diff options
25 files changed, 638 insertions, 665 deletions
diff --git a/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx b/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx index a6981b8a521..5e09a5dec0f 100644 --- a/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx +++ b/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx @@ -2,8 +2,6 @@ let traceID = ''; let processes = {}; let output = {}; let traceStartTimestamp = 0; -let topSpanId = ''; -let parentID = ''; // Generates a random hex string of size "size" const genRanHex = (size) => @@ -20,54 +18,77 @@ export default function transform(trace) { let temp = trace['trace']['children']; let spans = findChildren(temp); traceStartTimestamp = findTraceStartTime(spans); - topSpanId = genRanHex(16); let topSpanFirstHalf = createNewSpan(traceStartTimestamp)[0]; data.push(topSpanFirstHalf); const retrieved = findLogsAndChildren(spans, topSpanFirstHalf); const logs = retrieved['logs']; - const children = retrieved['children']; - traverseLogs(logs); - createChildren(children); + const indexes = retrieved['indexes']; + traverseLogs(logs, indexes); return output; } +function traverseChildren(span, logs, children, indexes, parent) { + let data = output['data'][0]['spans']; + let logSpan; + let spanTimestamp = span['timestamp']; + if (span.hasOwnProperty('children')) { + // Create a new parent span so that the timeline for the logs are correct + let duration = + (span['children'][span['children'].length - 1]['timestamp'] - + span['children'][0]['timestamp']) * + 1000; + parent['duration'] = duration; + logSpan = createNewSpan( + traceStartTimestamp + spanTimestamp, + duration, + 'p0', + parent['operationName'], + [{ refType: 'CHILD_OF', traceID: traceID, spanID: parent['spanID'] }] + ); + data.push(logSpan); + let log = []; + for (let x of span['children']) { + if (x.hasOwnProperty('children')) { + traverseChildren(x, logs, children, indexes, logSpan); + } else if (Array.isArray(x['message'])) { + if (log.length > 0) { + // finished moving down the search chain + // create a new array for holding the logs that represent moving up the search chain + logs.push(log); + indexes.push(data.indexOf(parent)); + log = []; + } + createChildren(x['message'], parent['spanID']); + } else { + // only add logs with a timestamp + if (x.hasOwnProperty('timestamp')) { + log.push(x); + } + } + } + indexes.push(data.indexOf(parent)); + logs.push(log); + } +} + function findLogsAndChildren(spans, topSpanFirstHalf) { let logs = []; let children = []; + let indexes = []; let data = output['data'][0]['spans']; - //let hitQuery = false; - //let topSpanSecondHalf = createNewSpan(); - //let secondHalfDuration = 0; - //output['data'][0]['spans'].push(topSpanSecondHalf); - //let firstHitSecondHalf = true; - for (let i = 0; i < spans.length - 1; i++) { + let totalDuration = findDuration(spans); + topSpanFirstHalf['duration'] = totalDuration; + for (let i = 0; i < spans.length; i++) { if (spans[i].hasOwnProperty('children')) { - let a = spans[i]['timestamp']; - topSpanFirstHalf = createNewSpan(traceStartTimestamp + a); - //firstHitSecondHalf = true; - //topSpanSecondHalf = createNewSpan(); - //output['data'][0]['spans'].push(topSpanSecondHalf); - let log = []; - for (let x of spans[i]['children']) { - if (Array.isArray(x['message'])) { - if (log.length > 0) { - // finished moving down the search chain - // create a new array for holding the logs that represent moving up the search chain - logs.push(log); - log = []; - } - //hitQuery = true; - children.push(x['message']); - } else { - // only add logs with a timestamp - if (x.hasOwnProperty('timestamp')) { - log.push(x); - } - } - } - logs.push(log); + traverseChildren( + spans[i], + logs, + children, + indexes, + data[data.length - 1] + ); } else if ( spans[i].hasOwnProperty('message') && spans[i].hasOwnProperty('timestamp') @@ -88,185 +109,149 @@ function findLogsAndChildren(spans, topSpanFirstHalf) { duration = (spans[i + 1]['timestamp'] - spans[i]['timestamp']) * 1000; duration = duration === 0 ? 1 : duration; } - topSpanFirstHalf['duration'] = topSpanFirstHalf['duration'] + duration; span['duration'] = duration; - // if (hitQuery) { - // if (firstHitSecondHalf) { - // secondHalfDuration = span['timestamp'] * 1000; - // topSpanSecondHalf['startTime'] = - // traceStartTimestamp + secondHalfDuration; - // firstHitSecondHalf = false; - // } - // topSpanSecondHalf['duration'] = - // span['timestamp'] * 1000 - secondHalfDuration; - // topSpanSecondHalf['logs'].push({ - // timestamp: traceStartTimestamp + span['timestamp'] * 1000, - // fields: [{ key: 'message', type: 'string', value: span['message'] }], - // }); - // } else { - // topSpanFirstHalf['duration'] = span['timestamp'] * 1000; - // topSpanFirstHalf['logs'].push({ - // timestamp: traceStartTimestamp + span['timestamp'] * 1000, - // fields: [{ key: 'message', type: 'string', value: span['message'] }], - // }); - // } } } - return { logs: logs, children: children }; + return { logs: logs, indexes: indexes }; } -function traverseLogs(logs) { - let first = true; +function traverseLogs(logs, indexes) { let data = output['data'][0]['spans']; - for (let log of logs) { + let previous; + for (let i = 0; i < logs.length; i++) { + previous = data[indexes[i]]; + let log = logs[i]; let logStartTimestamp = traceStartTimestamp + log[0]['timestamp'] * 1000; let logDuration = (log[log.length - 1]['timestamp'] - log[0]['timestamp']) * 1000; if (logDuration === 0) { logDuration = 1; } - let spanID = genRanHex(16); - if (first) { - parentID = spanID; - first = false; - } - let temp = createNewSpan( - logStartTimestamp, - logDuration, - 'p0', - 'test' - //[{ refType: 'CHILD_OF', traceID: traceID, spanID: topSpanId }] - ); + let temp = createNewSpan(logStartTimestamp, logDuration, 'p0', 'test', [ + { refType: 'CHILD_OF', traceID: traceID, spanID: previous['spanID'] }, + ]); let childSpan = temp[0]; let childSpanID = temp[1]; data.push(childSpan); - for (let i = 0; i < log.length - 1; i++) { - if (log[i].hasOwnProperty('message')) { - let logPointStart = traceStartTimestamp + log[i]['timestamp'] * 1000; + for (let k = 0; k < log.length - 1; k++) { + if (log[k].hasOwnProperty('message')) { + let logPointStart = traceStartTimestamp + log[k]['timestamp'] * 1000; let logPointDuration; - if (i > log.length - 1) { + if (k > log.length - 1) { logPointDuration = 1; } else { logPointDuration = - (log[i + 1]['timestamp'] - log[i]['timestamp']) * 1000; + (log[k + 1]['timestamp'] - log[k]['timestamp']) * 1000; logPointDuration = logPointDuration === 0 ? 1 : logPointDuration; } let logSpan = createNewSpan( logPointStart, logPointDuration, 'p0', - log[i]['message'], + log[k]['message'], [{ refType: 'CHILD_OF', traceID: traceID, spanID: childSpanID }] )[0]; data.push(logSpan); - // childSpan['logs'].push({ - // timestamp: traceStartTimestamp + log[i]['timestamp'] * 1000, - // fields: [ - // { key: 'message', type: 'string', value: log[i]['message'] }, - // ], - // }); } } } } -function createChildren(children) { - for (let i = 0; i < children.length; i++) { - let child = children[i][0]; - let processKey = `p${i + 1}`; - processes[processKey] = { serviceName: `Span${i}`, tags: [] }; - let spanID = genRanHex(16); - let data = output['data'][0]['spans']; - let startTimestamp = Date.parse(child['start_time']) * 1000; - let newSpan = { +function createChildren(children, parentID) { + let child = children[0]; + let processKey = genRanHex(5); + processes[processKey] = { serviceName: genRanHex(3), tags: [] }; + let spanID = genRanHex(16); + let data = output['data'][0]['spans']; + let startTimestamp = Date.parse(child['start_time']) * 1000; + let newSpan = { + traceID: traceID, + spanID: spanID, + operationName: 'something', + startTime: startTimestamp, + duration: child['duration_ms'] * 1000, + references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: parentID }], + tags: [], + logs: [], + processID: processKey, + }; + data.push(newSpan); + let traces = child['traces']; + for (let k = 0; k < traces.length; k++) { + let trace = traces[k]; + let traceTimestamp = trace['timestamp_ms']; + let events; + let firstEvent; + let duration; + if (trace['tag'] === 'query_execution') { + events = trace['threads'][0]['traces']; + firstEvent = events[0]; + duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000; + } else if (trace['tag'] === 'query_execution_plan') { + events = []; + let nextTrace = traces[k + 1]; + firstEvent = trace; + // query execution plan has no events, duration must therefore be found using the next trace + if (nextTrace['tag'] === 'query_execution') { + duration = + (nextTrace['threads'][0]['traces'][0]['timestamp_ms'] - + traceTimestamp) * + 1000; + } else { + duration = (nextTrace['timestamp_ms'] - traceTimestamp) * 1000; + } + } else { + events = trace['traces']; + firstEvent = events[0]; + duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000; + } + let childSpanID = genRanHex(16); + let childSpan = { traceID: traceID, - spanID: spanID, - operationName: `query${i}`, - startTime: startTimestamp, - duration: child['duration_ms'] * 1000, - references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: parentID }], + spanID: childSpanID, + operationName: trace['tag'], + startTime: startTimestamp + firstEvent['timestamp_ms'] * 1000, + duration: duration, + references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: spanID }], tags: [], logs: [], processID: processKey, }; - data.push(newSpan); - let traces = child['traces']; - for (let k = 0; k < traces.length; k++) { - let trace = traces[k]; - let traceTimestamp = trace['timestamp_ms']; - let events; - let firstEvent; - let duration; - if (trace['tag'] === 'query_execution') { - events = trace['threads'][0]['traces']; - firstEvent = events[0]; - duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000; - } else if (trace['tag'] === 'query_execution_plan') { - events = []; - let nextTrace = traces[k + 1]; - firstEvent = trace; - // query execution plan has no events, duration must therefore be found using the next trace - if (nextTrace['tag'] === 'query_execution') { - duration = - (nextTrace['threads'][0]['traces'][0]['timestamp_ms'] - - traceTimestamp) * - 1000; - } else { - duration = (nextTrace['timestamp_ms'] - traceTimestamp) * 1000; - } - } else { - events = trace['traces']; - firstEvent = events[0]; - duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000; - } - let childSpanID = genRanHex(16); - let childSpan = { - traceID: traceID, - spanID: childSpanID, - operationName: trace['tag'], - startTime: startTimestamp + firstEvent['timestamp_ms'] * 1000, - duration: duration, - references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: spanID }], - tags: [], - logs: [], - processID: processKey, - }; - data.push(childSpan); - if (events.length > 0) { - for (let j = 0; j < events.length; j++) { - let event = events[j]; - let eventID = genRanHex(16); - let eventStart = event['timestamp_ms']; - let operationName; - if (event.hasOwnProperty('event')) { - operationName = event['event']; - if ( - operationName === 'Complete query setup' || - operationName === 'MatchThread::run Done' - ) { - duration = (traceTimestamp - eventStart) * 1000; - } else { - duration = (events[j + 1]['timestamp_ms'] - eventStart) * 1000; - } + data.push(childSpan); + if (events.length > 0) { + for (let j = 0; j < events.length; j++) { + let event = events[j]; + let eventID = genRanHex(16); + let eventStart = event['timestamp_ms']; + let operationName; + if (event.hasOwnProperty('event')) { + operationName = event['event']; + if ( + operationName === 'Complete query setup' || + operationName === 'MatchThread::run Done' + ) { + duration = (traceTimestamp - eventStart) * 1000; } else { - operationName = event['tag']; duration = (events[j + 1]['timestamp_ms'] - eventStart) * 1000; } - let eventSpan = { - traceID: traceID, - spanID: eventID, - operationName: operationName, - startTime: startTimestamp + eventStart * 1000, - duration: duration, - references: [ - { refType: 'CHILD_OF', traceID: traceID, spanID: childSpanID }, - ], - tags: [], - logs: [], - processID: processKey, - }; - data.push(eventSpan); + } else { + operationName = event['tag']; + duration = (events[j + 1]['timestamp_ms'] - eventStart) * 1000; } + let eventSpan = { + traceID: traceID, + spanID: eventID, + operationName: operationName, + startTime: startTimestamp + eventStart * 1000, + duration: duration, + references: [ + { refType: 'CHILD_OF', traceID: traceID, spanID: childSpanID }, + ], + tags: [], + logs: [], + processID: processKey, + }; + data.push(eventSpan); } } } @@ -306,7 +291,6 @@ function findTraceStartTime(spans) { return startTime; } -//TODO: remove if not needed later function findDuration(spans) { let notFound = true; let duration = 0; @@ -315,16 +299,15 @@ function findDuration(spans) { if (spans[i].hasOwnProperty('timestamp')) { duration = spans[i]['timestamp']; notFound = false; - } else { - i--; } + i--; } - return duration; + return duration * 1000; } function createNewSpan( startTime = 0, - duration = 0, + duration = 1, processID = 'p0', operationName = 'Complete', references = [] diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index c6238149be1..74dc52f79ec 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -70,7 +70,7 @@ public class Flags { public static final UnboundBooleanFlag KEEP_STORAGE_NODE_UP = defineFeatureFlag( "keep-storage-node-up", true, - List.of("hakonhall"), "2022-07-07", "2022-08-07", + List.of("hakonhall"), "2022-07-07", "2022-09-07", "Whether to leave the storage node (with wanted state) UP while the node is permanently down.", "Takes effect immediately for nodes transitioning to permanently down.", ZONE_ID, APPLICATION_ID); @@ -119,7 +119,7 @@ public class Flags { public static final UnboundBooleanFlag USE_THREE_PHASE_UPDATES = defineFeatureFlag( "use-three-phase-updates", false, - List.of("vekterli"), "2020-12-02", "2022-08-01", + List.of("vekterli"), "2020-12-02", "2022-08-15", "Whether to enable the use of three-phase updates when bucket replicas are out of sync.", "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); @@ -245,7 +245,7 @@ public class Flags { public static final UnboundIntFlag MAX_ACTIVATION_INHIBITED_OUT_OF_SYNC_GROUPS = defineIntFlag( "max-activation-inhibited-out-of-sync-groups", 0, - List.of("vekterli"), "2021-02-19", "2022-08-01", + List.of("vekterli"), "2021-02-19", "2022-08-15", "Allows replicas in up to N content groups to not be activated " + "for query visibility if they are out of sync with a majority of other replicas", "Takes effect at redeployment", @@ -253,14 +253,14 @@ public class Flags { public static final UnboundIntFlag MAX_CONCURRENT_MERGES_PER_NODE = defineIntFlag( "max-concurrent-merges-per-node", 16, - List.of("balder", "vekterli"), "2021-06-06", "2022-08-01", + List.of("balder", "vekterli"), "2021-06-06", "2022-08-15", "Specifies max concurrent merges per content node.", "Takes effect at redeploy", ZONE_ID, APPLICATION_ID); public static final UnboundIntFlag MAX_MERGE_QUEUE_SIZE = defineIntFlag( "max-merge-queue-size", 100, - List.of("balder", "vekterli"), "2021-06-06", "2022-08-01", + List.of("balder", "vekterli"), "2021-06-06", "2022-08-15", "Specifies max size of merge queue.", "Takes effect at redeploy", ZONE_ID, APPLICATION_ID); @@ -339,7 +339,7 @@ public class Flags { public static final UnboundStringFlag MERGE_THROTTLING_POLICY = defineStringFlag( "merge-throttling-policy", "STATIC", - List.of("vekterli"), "2022-01-25", "2022-08-01", + List.of("vekterli"), "2022-01-25", "2022-08-15", "Sets the policy used for merge throttling on the content nodes. " + "Valid values: STATIC, DYNAMIC", "Takes effect at redeployment", @@ -347,7 +347,7 @@ public class Flags { public static final UnboundDoubleFlag PERSISTENCE_THROTTLING_WS_DECREMENT_FACTOR = defineDoubleFlag( "persistence-throttling-ws-decrement-factor", 1.2, - List.of("vekterli"), "2022-01-27", "2022-08-01", + List.of("vekterli"), "2022-01-27", "2022-08-15", "Sets the dynamic throttle policy window size decrement factor for persistence " + "async throttling. Only applies if DYNAMIC policy is used.", "Takes effect on redeployment", @@ -355,7 +355,7 @@ public class Flags { public static final UnboundDoubleFlag PERSISTENCE_THROTTLING_WS_BACKOFF = defineDoubleFlag( "persistence-throttling-ws-backoff", 0.95, - List.of("vekterli"), "2022-01-27", "2022-08-01", + List.of("vekterli"), "2022-01-27", "2022-08-15", "Sets the dynamic throttle policy window size backoff for persistence " + "async throttling. Only applies if DYNAMIC policy is used. Valid range [0, 1]", "Takes effect on redeployment", diff --git a/hosted-tenant-base/pom.xml b/hosted-tenant-base/pom.xml index 8bc98cf0733..58ad178794b 100644 --- a/hosted-tenant-base/pom.xml +++ b/hosted-tenant-base/pom.xml @@ -340,6 +340,12 @@ <delete dir="target/application-test/src" /> <copy file="target/${project.artifactId}-tests.jar" todir="target/application-test/components/" /> + + <!-- Copy any additional application bundles to the test package --> + <copy todir="target/application-test/components"> + <fileset dir="target/application/components" includes="*.jar" excludes="${project.artifactId}-deploy.jar" /> + </copy> + <zip destfile="target/application-test.zip" basedir="target/application-test/" /> </tasks> </configuration> diff --git a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java index 24cd9245b61..520e22de136 100644 --- a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java +++ b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java @@ -19,5 +19,6 @@ public class SecurityHeadersResponseFilter implements SecurityResponseFilter { response.setHeader("Strict-Transport-Security", "max-age=31536000; includeSubDomains"); response.setHeader("X-Content-Type-Options", "nosniff"); response.setHeader("X-Frame-Options", "DENY"); + response.setHeader("Referrer-Policy", "strict-origin-when-cross-origin"); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java index da93083a6de..c78d60e9950 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java @@ -15,7 +15,7 @@ import java.net.SocketTimeoutException; public class ConnectionException extends ConvergenceException { private ConnectionException(String message, Throwable cause) { - super(message, cause); + super(message, cause, true); } /** diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java index 4de7582fb7b..53e15b6c647 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java @@ -14,20 +14,15 @@ public class HttpException extends ConvergenceException { private final boolean isRetryable; private HttpException(int statusCode, String message, boolean isRetryable) { - super("HTTP status code " + statusCode + ": " + message); + super("HTTP status code " + statusCode + ": " + message, null, !isRetryable); this.isRetryable = isRetryable; } private HttpException(Response.Status status, String message, boolean isRetryable) { - super(status.toString() + " (" + status.getStatusCode() + "): " + message); + super(status.toString() + " (" + status.getStatusCode() + "): " + message, null, !isRetryable); this.isRetryable = isRetryable; } - private HttpException(String message) { - super(message); - this.isRetryable = false; - } - boolean isRetryable() { return isRetryable; } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java index a0b0499bb1e..636813a2169 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java @@ -5,10 +5,6 @@ import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; public class NodeRepositoryException extends ConvergenceException { public NodeRepositoryException(String message) { - super(message); - } - - public NodeRepositoryException(String message, Exception exception) { - super(message, exception); + super(message, null, true); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java index 9b8a749c33d..917b65b606c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java @@ -5,7 +5,12 @@ import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; @SuppressWarnings("serial") public class OrchestratorException extends ConvergenceException { + /** Creates a transient convergence exception. */ public OrchestratorException(String message) { - super(message); + this(message, true); + } + + protected OrchestratorException(String message, boolean isError) { + super(message, null, isError); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java index 281d43e0afc..858bce27ed8 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java @@ -59,7 +59,7 @@ public class OrchestratorImpl implements Orchestrator { } catch (HttpException e) { throw new OrchestratorException("Failed to suspend " + hostName + ": " + e.toString()); } catch (ConnectionException e) { - throw new ConvergenceException("Failed to suspend " + hostName + ": " + e.getMessage()); + throw ConvergenceException.ofTransient("Failed to suspend " + hostName + ": " + e.getMessage()); } catch (RuntimeException e) { throw new RuntimeException("Got error on suspend", e); } @@ -105,7 +105,7 @@ public class OrchestratorImpl implements Orchestrator { } catch (HttpException e) { throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString()); } catch (ConnectionException e) { - throw new ConvergenceException("Failed to batch suspend for " + parentHostName + ": " + e.getMessage()); + throw ConvergenceException.ofTransient("Failed to batch suspend for " + parentHostName + ": " + e.getMessage()); } catch (RuntimeException e) { throw new RuntimeException("Got error on batch suspend for " + parentHostName + ", with nodes " + hostNames, e); } @@ -126,7 +126,7 @@ public class OrchestratorImpl implements Orchestrator { } catch (HttpException e) { throw new OrchestratorException("Failed to resume " + hostName + ": " + e.toString()); } catch (ConnectionException e) { - throw new ConvergenceException("Failed to resume " + hostName + ": " + e.getMessage()); + throw ConvergenceException.ofTransient("Failed to resume " + hostName + ": " + e.getMessage()); } catch (RuntimeException e) { throw new RuntimeException("Got error on resume", e); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java index 3b016aac03f..a6a54807e56 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java @@ -4,6 +4,6 @@ package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator; @SuppressWarnings("serial") public class OrchestratorNotFoundException extends OrchestratorException { public OrchestratorNotFoundException(String message) { - super(message); + super(message, true); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index c893f7ffee4..062a7ce018d 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -120,7 +120,7 @@ public class StorageMaintainer { String[] results = output.split("\t"); if (results.length != 2) { - throw new ConvergenceException("Result from disk usage command not as expected: " + output); + throw ConvergenceException.ofError("Result from disk usage command not as expected: " + output); } return DiskSize.of(Long.parseLong(results[0]), DiskSize.Unit.kiB); @@ -226,11 +226,11 @@ public class StorageMaintainer { String output = uncheck(() -> Files.readAllLines(Paths.get("/proc/cpuinfo")).stream() .filter(line -> line.startsWith("microcode")) .findFirst() - .orElseThrow(() -> new ConvergenceException("No microcode information found in /proc/cpuinfo"))); + .orElseThrow(() -> ConvergenceException.ofError("No microcode information found in /proc/cpuinfo"))); String[] results = output.split(":"); if (results.length != 2) { - throw new ConvergenceException("Result from detect microcode command not as expected: " + output); + throw ConvergenceException.ofError("Result from detect microcode command not as expected: " + output); } return results[1].trim(); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java index 45dbfd07209..0933f22dee3 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java @@ -51,7 +51,7 @@ public class CoreCollector { Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput()); if (! matcher.find()) { - throw new ConvergenceException(String.format("Failed to extract binary path from GDB, result: %s, command: %s", + throw ConvergenceException.ofError(String.format("Failed to extract binary path from GDB, result: %s, command: %s", asString(result), Arrays.toString(wrappedCommand))); } return matcher.group("path").split(" ")[0]; @@ -62,7 +62,7 @@ public class CoreCollector { try { CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); if (result.getExitCode() != 0) { - throw new ConvergenceException("file command failed with " + asString(result)); + throw ConvergenceException.ofError("file command failed with " + asString(result)); } Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput()); @@ -89,7 +89,7 @@ public class CoreCollector { CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); if (result.getExitCode() != 0) - throw new ConvergenceException("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command)); + throw ConvergenceException.ofError("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command)); return List.of(result.getOutput().split("\n")); } @@ -99,7 +99,7 @@ public class CoreCollector { CommandResult result = container.executeCommandInContainer(context, context.users().root(), command); if (result.getExitCode() != 0) - throw new ConvergenceException("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command)); + throw ConvergenceException.ofError("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command)); return List.of(result.getOutput().split("\n")); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java index 6295765a95f..ece494a34d7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java @@ -95,7 +95,7 @@ public class CoredumpHandler { .map(FileFinder.FileAttributes::filename) .toList(); if (!pendingCores.isEmpty()) - throw new ConvergenceException(String.format("Cannot process %s coredumps: Still being written", + throw ConvergenceException.ofError(String.format("Cannot process %s coredumps: Still being written", pendingCores.size() < 5 ? pendingCores : pendingCores.size())); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java index 16a5eb022ad..c1b86fc7fe2 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java @@ -8,11 +8,34 @@ package com.yahoo.vespa.hosted.node.admin.nodeadmin; */ @SuppressWarnings("serial") public class ConvergenceException extends RuntimeException { - public ConvergenceException(String message) { - super(message); + /** Create an exception that will NOT increment the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofTransient(String message) { return ofTransient(message, null); } + + /** Create an exception that will NOT increment the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofTransient(String message, Throwable t) { return new ConvergenceException(message, t, false); } + + /** Create an exception that increments the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofError(String message) { return ofError(message, null); } + + /** Create an exception that increments the monitored unhandled_exceptions metric. */ + public static ConvergenceException ofError(String message, Throwable t) { return new ConvergenceException(message, t, true); } + + /** Create an exception with the same transient/error as the cause. */ + public static ConvergenceException ofNested(String message, ConvergenceException cause) { return new ConvergenceException(message, cause, cause.isError); } + + private final boolean isError; + + /** @param isError whether the exception should increment the monitored unhandled_exception metric. */ + protected ConvergenceException(String message, boolean isError) { + this(message, null, isError); } - public ConvergenceException(String message, Throwable t) { + /** @param isError whether the exception should increment the monitored unhandled_exception metric. */ + protected ConvergenceException(String message, Throwable t, boolean isError) { super(message, t); + this.isError = isError; } + + /** Whether the exception signals an error someone may want to look at, or whether it is expected to be transient (false). */ + public boolean isError() { return isError; } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index dda404797d9..314844dc6eb 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -110,7 +110,7 @@ public class NodeAdminStateUpdater { if (hostIsActiveInNR) orchestrator.resume(hostHostname); - throw new ConvergenceException("Timed out trying to freeze all nodes: will force an unfrozen tick"); + throw ConvergenceException.ofTransient("Timed out trying to freeze all nodes: will force an unfrozen tick"); } boolean wantFrozen = wantedState != RESUMED; @@ -118,7 +118,7 @@ public class NodeAdminStateUpdater { currentState = TRANSITIONING; if (!nodeAdmin.setFrozen(wantFrozen)) - throw new ConvergenceException("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen")); + throw ConvergenceException.ofTransient("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen")); switch (wantedState) { case RESUMED: diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 09bc58bdaa2..6408132fe0b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -244,7 +244,7 @@ public class NodeAgentImpl implements NodeAgent { hasResumedNode = false; context.log(logger, "Container successfully started, new containerState is " + containerState); return containerOperations.getContainer(context).orElseThrow(() -> - new ConvergenceException("Did not find container that was just started")); + ConvergenceException.ofError("Did not find container that was just started")); } private Optional<Container> removeContainerIfNeededUpdateContainerState( @@ -399,7 +399,7 @@ public class NodeAgentImpl implements NodeAgent { // Only update CPU resources containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources().memoryBytes())); return containerOperations.getContainer(context).orElseThrow(() -> - new ConvergenceException("Did not find container that was just updated")); + ConvergenceException.ofError("Did not find container that was just updated")); } private ContainerResources getContainerResources(NodeAgentContext context) { @@ -435,6 +435,8 @@ public class NodeAgentImpl implements NodeAgent { context.log(logger, Level.INFO, "Converged"); } catch (ConvergenceException e) { context.log(logger, e.getMessage()); + if (e.isError()) + numberOfUnhandledException++; } catch (Throwable e) { numberOfUnhandledException++; context.log(logger, Level.SEVERE, "Unhandled exception, ignoring", e); @@ -501,7 +503,7 @@ public class NodeAgentImpl implements NodeAgent { Duration timeLeft = Duration.between(clock.instant(), firstSuccessfulHealthCheckInstant.get().plus(warmUpDuration(context))); if (!container.get().resources().equalsCpu(getContainerResources(context))) - throw new ConvergenceException("Refusing to resume until warm up period ends (" + + throw ConvergenceException.ofTransient("Refusing to resume until warm up period ends (" + (timeLeft.isNegative() ? "next tick" : "in " + timeLeft) + ")"); } serviceDumper.processServiceDumpRequest(context); @@ -536,7 +538,7 @@ public class NodeAgentImpl implements NodeAgent { nodeRepository.setNodeState(context.hostname().value(), NodeState.ready); break; default: - throw new ConvergenceException("UNKNOWN STATE " + node.state().name()); + throw ConvergenceException.ofError("UNKNOWN STATE " + node.state().name()); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java index 07e172c5117..148d80c9803 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java @@ -67,7 +67,7 @@ public interface IPAddresses { if (ipv6addresses.size() <= 1) return ipv6addresses.stream().findFirst(); String addresses = ipv6addresses.stream().map(InetAddresses::toAddrString).collect(Collectors.joining(",")); - throw new ConvergenceException( + throw ConvergenceException.ofError( String.format( "Multiple IPv6 addresses found: %s. Perhaps a missing DNS entry or multiple AAAA records in DNS?", addresses)); @@ -103,7 +103,7 @@ public interface IPAddresses { if (siteLocalIPv4Addresses.size() == 1) return Optional.of(siteLocalIPv4Addresses.get(0)); String addresses = ipv4Addresses.stream().map(InetAddresses::toAddrString).collect(Collectors.joining(",")); - throw new ConvergenceException( + throw ConvergenceException.ofError( String.format( "Multiple IPv4 addresses found: %s. Perhaps a missing DNS entry or multiple A records in DNS?", addresses)); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index b13a3a184e5..e4c2f595164 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -334,7 +334,7 @@ public class NodeAgentImplTest { when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node)); when(containerOperations.pullImageAsyncIfNeeded(any(), eq(dockerImage), any())).thenReturn(false); - doThrow(new ConvergenceException("Connection refused")).doNothing() + doThrow(ConvergenceException.ofTransient("Connection refused")).doNothing() .when(healthChecker).verifyHealth(eq(context)); try { @@ -640,7 +640,7 @@ public class NodeAgentImplTest { InOrder inOrder = inOrder(orchestrator, containerOperations); - ConvergenceException healthCheckException = new ConvergenceException("Not yet up"); + ConvergenceException healthCheckException = ConvergenceException.ofTransient("Not yet up"); doThrow(healthCheckException).when(healthChecker).verifyHealth(any()); for (int i = 0; i < 3; i++) { try { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java index bb441c6621c..d8becdf7c80 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java @@ -1,22 +1,16 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.test.ManualClock; -import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock; import org.junit.Test; import java.time.Duration; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; -import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -28,40 +22,27 @@ public class AutoscalingIntegrationTest { @Test public void testComponentIntegration() { - NodeResources nodes = new NodeResources(1, 10, 100, 1); - NodeResources hosts = new NodeResources(3, 20, 200, 1); - - AutoscalingTester tester = new AutoscalingTester(hosts); - MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(tester.nodeRepository(), + var fixture = AutoscalingTester.fixture() + .hostResources(new NodeResources(3, 20, 200, 1)) + .initialResources(Optional.of(new ClusterResources(2, 1, + new NodeResources(1, 10, 100, 1)))) + .build(); + MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(fixture.tester().nodeRepository(), new OrchestratorMock(), - new MockHttpClient(tester.clock())); - Autoscaler autoscaler = new Autoscaler(tester.nodeRepository()); - - ApplicationId application1 = AutoscalingTester.applicationId("test1"); - ClusterSpec cluster1 = AutoscalingTester.clusterSpec(ClusterSpec.Type.container, "test"); - Set<String> hostnames = tester.deploy(application1, cluster1, 2, 1, nodes) - .stream().map(HostSpec::hostname) - .collect(Collectors.toSet()); + new MockHttpClient(fixture.tester().clock())); + Autoscaler autoscaler = new Autoscaler(fixture.tester().nodeRepository()); + // The metrics response (below) hardcodes these hostnames: - assertEquals(Set.of("node-1-of-host-1.yahoo.com", "node-1-of-host-10.yahoo.com"), hostnames); + assertEquals(Set.of("node-1-of-host-1.yahoo.com", "node-1-of-host-10.yahoo.com"), fixture.nodes().hostnames()); for (int i = 0; i < 1000; i++) { - tester.clock().advance(Duration.ofSeconds(10)); - fetcher.fetchMetrics(application1).whenComplete((r, e) -> tester.nodeMetricsDb().addNodeMetrics(r.nodeMetrics())); - tester.clock().advance(Duration.ofSeconds(10)); - tester.nodeMetricsDb().gc(); + fixture.tester().clock().advance(Duration.ofSeconds(10)); + fetcher.fetchMetrics(fixture.applicationId()).whenComplete((r, e) -> fixture.tester().nodeMetricsDb().addNodeMetrics(r.nodeMetrics())); + fixture.tester().clock().advance(Duration.ofSeconds(10)); + fixture.tester().nodeMetricsDb().gc(); } - ClusterResources min = new ClusterResources(2, 1, nodes); - ClusterResources max = new ClusterResources(2, 1, nodes); - - Application application = tester.nodeRepository().applications().get(application1).orElse(Application.empty(application1)) - .withCluster(cluster1.id(), false, Capacity.from(min, max)); - try (Mutex lock = tester.nodeRepository().nodes().lock(application1)) { - tester.nodeRepository().applications().put(application, lock); - } - var scaledResources = autoscaler.suggest(application, application.clusters().get(cluster1.id()), - tester.nodeRepository().nodes().list().owner(application1)); + var scaledResources = autoscaler.suggest(fixture.application(), fixture.cluster(), fixture.nodes()); assertTrue(scaledResources.isPresent()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index a2eb5fba9f9..e6873e7118f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -1,19 +1,19 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; -import com.yahoo.config.provision.Cloud; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeResources.DiskSpeed; +import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; +import static com.yahoo.config.provision.NodeResources.DiskSpeed.slow; import com.yahoo.config.provision.NodeResources.StorageType; +import static com.yahoo.config.provision.NodeResources.StorageType.remote; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; -import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; @@ -22,8 +22,6 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import org.junit.Test; import java.time.Duration; -import java.util.ArrayList; -import java.util.List; import java.util.Optional; import static org.junit.Assert.assertEquals; @@ -41,11 +39,11 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(1)); assertTrue("No measurements -> No change", fixture.autoscale().isEmpty()); - fixture.applyCpuLoad(0.7f, 59); + fixture.loader().applyCpuLoad(0.7f, 59); assertTrue("Too few measurements -> No change", fixture.autoscale().isEmpty()); fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.applyCpuLoad(0.7f, 120); + fixture.loader().applyCpuLoad(0.7f, 120); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high", 9, 1, 2.8, 5.0, 50.0, fixture.autoscale()); @@ -56,14 +54,14 @@ public class AutoscalingTest { fixture.deactivateRetired(Capacity.from(scaledResources)); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyCpuLoad(0.8f, 3); + fixture.loader().applyCpuLoad(0.8f, 3); assertTrue("Load change is large, but insufficient measurements for new config -> No change", fixture.autoscale().isEmpty()); - fixture.applyCpuLoad(0.19f, 100); + fixture.loader().applyCpuLoad(0.19f, 100); assertEquals("Load change is small -> No change", Optional.empty(), fixture.autoscale().target()); - fixture.applyCpuLoad(0.1f, 120); + fixture.loader().applyCpuLoad(0.1f, 120); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", 9, 1, 1.0, 5.0, 50.0, fixture.autoscale()); @@ -74,7 +72,7 @@ public class AutoscalingTest { public void test_autoscaling_up_is_fast_TODO() { var fixture = AutoscalingTester.fixture().build(); fixture.tester().clock().advance(Duration.ofDays(1)); // TODO: Remove the need for this - fixture.applyLoad(1.0, 1.0, 1.0, 120); // TODO: Make this low + fixture.loader().applyLoad(1.0, 1.0, 1.0, 120); // TODO: Make this low fixture.tester().assertResources("Scaling up since resource usage is too high", 10, 1, 9.4, 8.5, 92.6, fixture.autoscale()); @@ -84,12 +82,12 @@ public class AutoscalingTest { @Test public void test_autoscaling_single_container_group() { var fixture = AutoscalingTester.fixture().clusterType(ClusterSpec.Type.container).build(); - fixture.applyCpuLoad(0.25f, 120); + fixture.loader().applyCpuLoad(0.25f, 120); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since cpu usage is too high", 5, 1, 3.8, 8.0, 50.5, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); - fixture.applyCpuLoad(0.1f, 120); + fixture.loader().applyCpuLoad(0.1f, 120); fixture.tester().assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 6.4, 25.5, fixture.autoscale()); @@ -97,55 +95,55 @@ public class AutoscalingTest { @Test public void autoscaling_handles_disk_setting_changes() { - var resources = new NodeResources(3, 100, 100, 1, NodeResources.DiskSpeed.slow); + var resources = new NodeResources(3, 100, 100, 1, slow); var fixture = AutoscalingTester.fixture() .hostResources(resources) .initialResources(Optional.of(new ClusterResources(5, 1, resources))) .capacity(Capacity.from(new ClusterResources(5, 1, resources))) .build(); - assertTrue(fixture.tester().nodeRepository().nodes().list().owner(fixture.application).stream() - .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow)); + assertTrue(fixture.tester().nodeRepository().nodes().list().owner(fixture.applicationId).stream() + .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == slow)); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyCpuLoad(0.25, 120); + fixture.loader().applyCpuLoad(0.25, 120); // Changing min and max from slow to any ClusterResources min = new ClusterResources( 2, 1, - new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.any)); + new NodeResources(1, 1, 1, 1, DiskSpeed.any)); ClusterResources max = new ClusterResources(20, 1, - new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any)); + new NodeResources(100, 1000, 1000, 1, DiskSpeed.any)); var capacity = Capacity.from(min, max); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up", 14, 1, 1.4, 30.8, 30.8, fixture.autoscale(capacity)); assertEquals("Disk speed from new capacity is used", - NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); + DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); fixture.deploy(Capacity.from(scaledResources)); assertTrue(fixture.nodes().stream() - .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.any)); + .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == DiskSpeed.any)); } @Test public void autoscaling_target_preserves_any() { NodeResources resources = new NodeResources(1, 10, 10, 1); - var capacity = Capacity.from(new ClusterResources( 2, 1, resources.with(NodeResources.DiskSpeed.any)), - new ClusterResources( 10, 1, resources.with(NodeResources.DiskSpeed.any))); + var capacity = Capacity.from(new ClusterResources( 2, 1, resources.with(DiskSpeed.any)), + new ClusterResources( 10, 1, resources.with(DiskSpeed.any))); var fixture = AutoscalingTester.fixture() .capacity(capacity) .initialResources(Optional.empty()) .build(); // Redeployment without target: Uses current resource numbers with *requested* non-numbers (i.e disk-speed any) - assertTrue(fixture.tester().nodeRepository().applications().get(fixture.application).get().cluster(fixture.cluster.id()).get().targetResources().isEmpty()); + assertTrue(fixture.tester().nodeRepository().applications().get(fixture.applicationId).get().cluster(fixture.clusterSpec.id()).get().targetResources().isEmpty()); fixture.deploy(); - assertEquals(NodeResources.DiskSpeed.any, fixture.nodes().first().get().allocation().get().requestedResources().diskSpeed()); + assertEquals(DiskSpeed.any, fixture.nodes().first().get().allocation().get().requestedResources().diskSpeed()); // Autoscaling: Uses disk-speed any as well fixture.deactivateRetired(capacity); fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.applyCpuLoad(0.8, 120); - assertEquals(NodeResources.DiskSpeed.any, fixture.autoscale(capacity).target().get().nodeResources().diskSpeed()); + fixture.loader().applyCpuLoad(0.8, 120); + assertEquals(DiskSpeed.any, fixture.autoscale(capacity).target().get().nodeResources().diskSpeed()); } @Test @@ -158,7 +156,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)).build(); fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.applyLoad(0.25, 0.95, 0.95, 120); + fixture.loader().applyLoad(0.25, 0.95, 0.95, 120); fixture.tester().assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, fixture.autoscale()); @@ -172,7 +170,7 @@ public class AutoscalingTest { // deploy fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(0.05f, 0.05f, 0.05f, 120); + fixture.loader().applyLoad(0.05f, 0.05f, 0.05f, 120); fixture.tester().assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.4, 13.9, fixture.autoscale()); @@ -188,13 +186,13 @@ public class AutoscalingTest { .build(); NodeResources defaultResources = - new CapacityPolicies(fixture.tester().nodeRepository()).defaultNodeResources(fixture.cluster, fixture.application, false); + new CapacityPolicies(fixture.tester().nodeRepository()).defaultNodeResources(fixture.clusterSpec, fixture.applicationId, false); fixture.tester().assertResources("Min number of nodes and default resources", 2, 1, defaultResources, fixture.nodes().toResources()); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(0.25, 0.95, 0.95, 120); + fixture.loader().applyLoad(0.25, 0.95, 0.95, 120); fixture.tester().assertResources("Scaling up", 5, 1, defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(), @@ -211,7 +209,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyCpuLoad(0.3, 240); + fixture.loader().applyCpuLoad(0.3, 240); fixture.tester().assertResources("Scaling up", 6, 6, 3.8, 8.0, 10.0, fixture.autoscale()); @@ -224,58 +222,72 @@ public class AutoscalingTest { // deploy fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.applyCpuLoad(0.25, 120); + fixture.loader().applyCpuLoad(0.25, 120); assertTrue(fixture.autoscale().isEmpty()); } @Test - public void prefers_remote_disk_when_no_local_match() { - NodeResources resources = new NodeResources(3, 100, 50, 1); - ClusterResources min = new ClusterResources( 2, 1, resources); - ClusterResources max = min; - // AutoscalingTester hardcodes 3Gb memory overhead: - Flavor localFlavor = new Flavor("local", new NodeResources(3, 97, 75, 1, DiskSpeed.fast, StorageType.local)); - Flavor remoteFlavor = new Flavor("remote", new NodeResources(3, 97, 50, 1, DiskSpeed.fast, StorageType.remote)); - - var tester = new AutoscalingTester(new Zone(new Cloud.Builder().dynamicProvisioning(true).build(), - SystemName.defaultSystem(), Environment.prod, RegionName.defaultName()), - List.of(localFlavor, remoteFlavor)); - tester.provisioning().makeReadyNodes(5, localFlavor.name(), NodeType.host, 8); - tester.provisioning().makeReadyNodes(5, remoteFlavor.name(), NodeType.host, 8); - tester.provisioning().activateTenantHosts(); - - ApplicationId application1 = AutoscalingTester.applicationId("application1"); - ClusterSpec cluster1 = AutoscalingTester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + public void container_prefers_remote_disk_when_no_local_match() { + var resources = new ClusterResources( 2, 1, new NodeResources(3, 100, 50, 1)); + var local = new NodeResources(3, 100, 75, 1, fast, StorageType.local); + var remote = new NodeResources(3, 100, 50, 1, fast, StorageType.remote); + var fixture = AutoscalingTester.fixture() + .dynamicProvisioning(true) + .clusterType(ClusterSpec.Type.container) + .hostResources(local, remote) + .capacity(Capacity.from(resources)) + .initialResources(Optional.of(new ClusterResources(3, 1, resources.nodeResources()))) + .build(); - // deploy - tester.deploy(application1, cluster1, 3, 1, min.nodeResources()); - Duration timeAdded = tester.addDiskMeasurements(0.01f, 1f, 120, application1); - tester.clock().advance(timeAdded.negated()); - tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> 10.0); // Query traffic only - Autoscaler.Advice suggestion = tester.suggest(application1, cluster1.id(), min, max); - tester.assertResources("Choosing the remote disk flavor as it has less disk", - 6, 1, 3.0, 100.0, 10.0, - suggestion); + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().applyLoad(0.01, 0.01, 0.01, 120); + Autoscaler.Advice suggestion = fixture.suggest(); + fixture.tester().assertResources("Choosing the remote disk flavor as it has less disk", + 2, 1, 3.0, 100.0, 10.0, + suggestion); assertEquals("Choosing the remote disk flavor as it has less disk", StorageType.remote, suggestion.target().get().nodeResources().storageType()); } @Test + public void content_prefers_local_disk_when_no_local_match() { + var resources = new ClusterResources( 2, 1, new NodeResources(3, 100, 50, 1)); + var local = new NodeResources(3, 100, 75, 1, fast, StorageType.local); + var remote = new NodeResources(3, 100, 50, 1, fast, StorageType.remote); + var fixture = AutoscalingTester.fixture() + .dynamicProvisioning(true) + .clusterType(ClusterSpec.Type.content) + .hostResources(local, remote) + .capacity(Capacity.from(resources)) + .initialResources(Optional.of(new ClusterResources(3, 1, resources.nodeResources()))) + .build(); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().applyLoad(0.01, 0.01, 0.01, 120); + Autoscaler.Advice suggestion = fixture.suggest(); + fixture.tester().assertResources("Always prefers local disk for content", + 2, 1, 3.0, 100.0, 75.0, + suggestion); + assertEquals("Always prefers local disk for content", + StorageType.local, suggestion.target().get().nodeResources().storageType()); + } + + @Test public void suggestions_ignores_limits() { ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); var fixture = AutoscalingTester.fixture().capacity(Capacity.from(min, min)).build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyCpuLoad(1.0, 120); + fixture.loader().applyCpuLoad(1.0, 120); fixture.tester().assertResources("Suggesting above capacity limit", 8, 1, 9.3, 5.7, 57.1, - fixture.tester().suggest(fixture.application, fixture.cluster.id(), min, min)); + fixture.tester().suggest(fixture.applicationId, fixture.clusterSpec.id(), min, min)); } @Test public void not_using_out_of_service_measurements() { var fixture = AutoscalingTester.fixture().build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(0.9, 0.6, 0.7, 1, false, true, 120); + fixture.loader().applyLoad(0.9, 0.6, 0.7, 1, false, true, 120); assertTrue("Not scaling up since nodes were measured while cluster was out of service", fixture.autoscale().isEmpty()); } @@ -284,7 +296,7 @@ public class AutoscalingTest { public void not_using_unstable_measurements() { var fixture = AutoscalingTester.fixture().build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(0.9, 0.6, 0.7, 1, true, false, 120); + fixture.loader().applyLoad(0.9, 0.6, 0.7, 1, true, false, 120); assertTrue("Not scaling up since nodes were measured while cluster was out of service", fixture.autoscale().isEmpty()); } @@ -299,7 +311,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyCpuLoad(0.9, 120); + fixture.loader().applyCpuLoad(0.9, 120); fixture.tester().assertResources("Scaling the number of groups, but nothing requires us to stay with 1 node per group", 10, 5, 7.7, 40.0, 40.0, fixture.autoscale()); @@ -315,9 +327,9 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - Duration timePassed = fixture.addCpuMeasurements(0.25, 120); + Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120); fixture.tester().clock().advance(timePassed.negated()); - fixture.addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 1.0); + fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 1.0); fixture.tester().assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 10, 1, 2.3, 27.8, 27.8, fixture.autoscale()); @@ -334,9 +346,9 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - Duration timePassed = fixture.addCpuMeasurements(0.25, 120); + Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120); fixture.tester().clock().advance(timePassed.negated()); - fixture.addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); + fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); fixture.tester().assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", 6, 1, 1.0, 50.0, 50.0, fixture.autoscale()); @@ -352,7 +364,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(1)); - fixture.applyMemLoad(1.0, 1000); + fixture.loader().applyMemLoad(1.0, 1000); fixture.tester().assertResources("Increase group size to reduce memory load", 8, 2, 6.5, 96.2, 62.5, fixture.autoscale()); @@ -368,7 +380,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyMemLoad(0.02, 120); + fixture.loader().applyMemLoad(0.02, 120); fixture.tester().assertResources("Scaling down", 6, 1, 3.1, 4.0, 100.0, fixture.autoscale()); @@ -377,10 +389,10 @@ public class AutoscalingTest { @Test public void scaling_down_only_after_delay() { var fixture = AutoscalingTester.fixture().build(); - fixture.applyMemLoad(0.02, 120); + fixture.loader().applyMemLoad(0.02, 120); assertTrue("Too soon after initial deployment", fixture.autoscale().target().isEmpty()); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyMemLoad(0.02, 120); + fixture.loader().applyMemLoad(0.02, 120); fixture.tester().assertResources("Scaling down since enough time has passed", 6, 1, 1.2, 4.0, 80.0, fixture.autoscale()); @@ -392,7 +404,7 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture() .resourceCalculator(new OnlySubtractingWhenForecastingCalculator(0)) .build(); - fixture.applyLoad(1.0, 1.0, 0.7, 1000); + fixture.loader().applyLoad(1.0, 1.0, 0.7, 1000); fixture.tester().assertResources("Scaling up", 9, 1, 5.0, 9.6, 72.9, fixture.autoscale()); @@ -402,7 +414,7 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture() .resourceCalculator(new OnlySubtractingWhenForecastingCalculator(3)) .build(); - fixture.applyLoad(1.0, 1.0, 0.7, 1000); + fixture.loader().applyLoad(1.0, 1.0, 0.7, 1000); fixture.tester().assertResources("With 3Gb memory tax, we scale up memory more", 7, 1, 6.4, 15.8, 97.2, fixture.autoscale()); @@ -413,41 +425,30 @@ public class AutoscalingTest { public void test_autoscaling_with_dynamic_provisioning() { ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); - var capacity = Capacity.from(min, max); - List<Flavor> flavors = new ArrayList<>(); - flavors.add(new Flavor("aws-xlarge", new NodeResources(3, 200, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); - flavors.add(new Flavor("aws-large", new NodeResources(3, 150, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); - flavors.add(new Flavor("aws-medium", new NodeResources(3, 100, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); - flavors.add(new Flavor("aws-small", new NodeResources(3, 80, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote))); - AutoscalingTester tester = new AutoscalingTester(new Zone(Cloud.builder() - .dynamicProvisioning(true) - .build(), - SystemName.main, - Environment.prod, RegionName.from("us-east")), - flavors); - - ApplicationId application1 = AutoscalingTester.applicationId("application1"); - ClusterSpec cluster1 = AutoscalingTester.clusterSpec(ClusterSpec.Type.content, "cluster1"); - - // deploy (Why 103 Gb memory? See AutoscalingTester.MockHostResourcesCalculator - tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1)); - - tester.clock().advance(Duration.ofDays(2)); - tester.addMemMeasurements(0.9f, 0.6f, 120, application1); - ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", - 8, 1, 3, 83, 34.3, - tester.autoscale(application1, cluster1, capacity)); - - tester.deploy(application1, cluster1, scaledResources); - tester.deactivateRetired(application1, cluster1, scaledResources); - - tester.clock().advance(Duration.ofDays(2)); - tester.addMemMeasurements(0.3f, 0.6f, 1000, application1); - tester.clock().advance(Duration.ofMinutes(-10 * 5)); - tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - tester.assertResources("Scaling down since resource usage has gone down", - 5, 1, 3, 83, 36.0, - tester.autoscale(application1, cluster1, capacity)); + var fixture = AutoscalingTester.fixture() + .dynamicProvisioning(true) + .hostResources(new NodeResources(3, 200, 100, 1, fast, remote), + new NodeResources(3, 150, 100, 1, fast, remote), + new NodeResources(3, 100, 100, 1, fast, remote), + new NodeResources(3, 80, 100, 1, fast, remote)) + .capacity(Capacity.from(min, max)) + .initialResources(Optional.of(new ClusterResources(5, 1, + new NodeResources(3, 100, 100, 1)))) + .build(); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().applyMemLoad(0.9, 120); + var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high.", + 8, 1, 3, 80, 57.1, + fixture.autoscale()); + fixture.deploy(Capacity.from(scaledResources)); + fixture.deactivateRetired(Capacity.from(scaledResources)); + + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.loader().applyMemLoad(0.3, 1000); + fixture.tester().assertResources("Scaling down since resource usage has gone down", + 5, 1, 3, 80, 100.0, + fixture.autoscale()); } @Test @@ -458,7 +459,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester.clock().advance(Duration.ofDays(1)); - fixture.applyCpuLoad(0.25, 120); + fixture.loader().applyCpuLoad(0.25, 120); // (no read share stored) fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default", @@ -481,9 +482,9 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture().build(); fixture.tester().clock().advance(Duration.ofDays(2)); - Duration timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 0.0); + Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 0.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.25, 200); + fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", 9, 1, 2.1, 5, 50, @@ -491,20 +492,20 @@ public class AutoscalingTest { fixture.setScalingDuration(Duration.ofMinutes(5)); fixture.tester().clock().advance(Duration.ofDays(2)); - timeAdded = fixture.addLoadMeasurements(100, t -> 10.0 + (t < 50 ? t : 100 - t), t -> 0.0); + timeAdded = fixture.loader().addLoadMeasurements(100, t -> 10.0 + (t < 50 ? t : 100 - t), t -> 0.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.25, 200); + fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale down since observed growth is slower than scaling time", 9, 1, 1.8, 5, 50, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(60)); fixture.tester().clock().advance(Duration.ofDays(2)); - timeAdded = fixture.addLoadMeasurements(100, - t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)), - t -> 0.0); + timeAdded = fixture.loader().addLoadMeasurements(100, + t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)), + t -> 0.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.25, 200); + fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since observed growth is faster than scaling time", 9, 1, 2.1, 5, 50, fixture.autoscale()); @@ -514,48 +515,48 @@ public class AutoscalingTest { public void test_autoscaling_considers_query_vs_write_rate() { var fixture = AutoscalingTester.fixture().build(); - fixture.addCpuMeasurements(0.4, 220); + fixture.loader().addCpuMeasurements(0.4, 220); // Why twice the query rate at time = 0? // This makes headroom for queries doubling, which we want to observe the effect of here fixture.tester().clock().advance(Duration.ofDays(2)); - var timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0); + var timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.4, 200); + fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester.assertResources("Query and write load is equal -> scale up somewhat", 9, 1, 2.4, 5, 50, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); - timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0); + timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.4, 200); + fixture.loader().addCpuMeasurements(0.4, 200); // TODO: Ackhually, we scale down here - why? fixture.tester().assertResources("Query load is 4x write load -> scale up more", 9, 1, 2.1, 5, 50, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); - timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); + timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.4, 200); + fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write load is 10x query load -> scale down", 9, 1, 1.1, 5, 50, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); - timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0); + timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.4, 200); + fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Query only -> largest possible", 8, 1, 4.9, 5.7, 57.1, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); - timeAdded = fixture.addLoadMeasurements(100, t -> 0.0, t -> 10.0); + timeAdded = fixture.loader().addLoadMeasurements(100, t -> 0.0, t -> 10.0); fixture.tester.clock().advance(timeAdded.negated()); - fixture.addCpuMeasurements(0.4, 200); + fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write only -> smallest possible", 6, 1, 1.0, 8, 80, fixture.autoscale()); @@ -567,7 +568,7 @@ public class AutoscalingTest { .zone(new Zone(Environment.dev, RegionName.from("us-east"))) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(1.0, 1.0, 1.0, 200); + fixture.loader().applyLoad(1.0, 1.0, 1.0, 200); assertTrue("Not attempting to scale up because policies dictate we'll only get one node", fixture.autoscale().target().isEmpty()); } @@ -588,7 +589,7 @@ public class AutoscalingTest { .zone(new Zone(Environment.dev, RegionName.from("us-east"))) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(1.0, 1.0, 1.0, 200); + fixture.loader().applyLoad(1.0, 1.0, 1.0, 200); fixture.tester().assertResources("We scale even in dev because resources are required", 3, 1, 1.0, 7.7, 83.3, fixture.autoscale()); @@ -607,7 +608,7 @@ public class AutoscalingTest { .zone(new Zone(Environment.dev, RegionName.from("us-east"))) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.applyLoad(1.0, 1.0, 1.0, 200); + fixture.loader().applyLoad(1.0, 1.0, 1.0, 200); fixture.tester().assertResources("We scale even in dev because resources are required", 3, 1, 1.5, 8, 50, fixture.autoscale()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 2a4dbe32ab5..e21c57b3ef5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -1,22 +1,18 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; import com.yahoo.test.ManualClock; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.applications.Application; @@ -29,11 +25,9 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import java.time.Duration; -import java.time.Instant; +import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.function.IntFunction; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -48,29 +42,17 @@ class AutoscalingTester { private final HostResourcesCalculator hostResourcesCalculator; private final CapacityPolicies capacityPolicies; - /** Creates an autoscaling tester with a single host type ready */ - public AutoscalingTester(NodeResources hostResources) { - this(Environment.prod, hostResources); - } - - public AutoscalingTester(Environment environment, NodeResources hostResources) { - this(new Zone(environment, RegionName.from("us-east")), hostResources, null); - } - - public AutoscalingTester(Zone zone, NodeResources hostResources, HostResourcesCalculator resourcesCalculator) { + public AutoscalingTester(Zone zone, HostResourcesCalculator resourcesCalculator, List<NodeResources> hostResources) { this(zone, hostResources, resourcesCalculator, 20); } - private AutoscalingTester(Zone zone, NodeResources hostResources, HostResourcesCalculator resourcesCalculator, int hostCount) { - this(zone, List.of(new Flavor("hostFlavor", hostResources)), resourcesCalculator); - provisioningTester.makeReadyNodes(hostCount, "hostFlavor", NodeType.host, 8); + private AutoscalingTester(Zone zone, List<NodeResources> hostResources, HostResourcesCalculator resourcesCalculator, int hostCount) { + this(zone, toFlavors(hostResources), resourcesCalculator); + for (Flavor flavor : toFlavors(hostResources)) + provisioningTester.makeReadyNodes(hostCount, flavor.name(), NodeType.host, 8); provisioningTester.activateTenantHosts(); } - public AutoscalingTester(Zone zone, List<Flavor> flavors) { - this(zone, flavors, new MockHostResourcesCalculator(zone, 3)); - } - private AutoscalingTester(Zone zone, List<Flavor> flavors, HostResourcesCalculator resourcesCalculator) { provisioningTester = new ProvisioningTester.Builder().zone(zone) .flavors(flavors) @@ -83,6 +65,13 @@ class AutoscalingTester { capacityPolicies = new CapacityPolicies(provisioningTester.nodeRepository()); } + private static List<Flavor> toFlavors(List<NodeResources> resources) { + List<Flavor> flavors = new ArrayList<>(); + for (int i = 0; i < resources.size(); i++) + flavors.add(new Flavor("flavor" + i, resources.get(i))); + return flavors; + } + public static Fixture.Builder fixture() { return new Fixture.Builder(); } public ProvisioningTester provisioning() { return provisioningTester; } @@ -121,10 +110,6 @@ class AutoscalingTester { nodeRepository().nodes().setReady(List.of(host), Agent.system, getClass().getSimpleName()); } - public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { - deactivateRetired(application, cluster, Capacity.from(resources)); - } - public void deactivateRetired(ApplicationId application, ClusterSpec cluster, Capacity capacity) { try (Mutex lock = nodeRepository().nodes().lock(application)) { for (Node node : nodeRepository().nodes().list(Node.State.active).owner(application)) { @@ -135,145 +120,6 @@ class AutoscalingTester { deploy(application, cluster, capacity); } - public ClusterModel clusterModel(ApplicationId applicationId, ClusterSpec clusterSpec) { - var application = nodeRepository().applications().get(applicationId).get(); - return new ClusterModel(application, - clusterSpec, - application.cluster(clusterSpec.id()).get(), - nodeRepository().nodes().list(Node.State.active).cluster(clusterSpec.id()), - nodeRepository().metricsDb(), - nodeRepository().clock()); - } - - /** - * Adds measurements with the given resource value and ideal values for the other resources, - * scaled to take one node redundancy into account. - * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler - * wanting to see the ideal load with one node missing.) - * - * @param otherResourcesLoad the load factor relative to ideal to use for other resources - * @param count the number of measurements - * @param applicationId the application we're adding measurements for all nodes of - */ - public Duration addCpuMeasurements(float value, float otherResourcesLoad, - int count, ApplicationId applicationId) { - NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); - float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); - Instant initialTime = clock().instant(); - for (int i = 0; i < count; i++) { - clock().advance(Duration.ofSeconds(150)); - for (Node node : nodes) { - Load load = new Load(value, - ClusterModel.idealMemoryLoad * otherResourcesLoad, - ClusterModel.idealContentDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor); - nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), - new NodeMetricSnapshot(clock().instant(), - load, - 0, - true, - true, - 0.0)))); - } - } - return Duration.between(initialTime, clock().instant()); - } - - /** - * Adds measurements with the given resource value and ideal values for the other resources, - * scaled to take one node redundancy into account. - * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler - * wanting to see the ideal load with one node missing.) - * - * @param otherResourcesLoad the load factor relative to ideal to use for other resources - * @param count the number of measurements - * @param applicationId the application we're adding measurements for all nodes of - * @return the duration added to the current time by this - */ - public Duration addDiskMeasurements(float value, float otherResourcesLoad, - int count, ApplicationId applicationId) { - NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); - float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); - Instant initialTime = clock().instant(); - for (int i = 0; i < count; i++) { - clock().advance(Duration.ofSeconds(150)); - for (Node node : nodes) { - Load load = new Load(ClusterModel.idealQueryCpuLoad * otherResourcesLoad, - ClusterModel.idealContentDiskLoad * otherResourcesLoad, - value).multiply(oneExtraNodeFactor); - nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), - new NodeMetricSnapshot(clock().instant(), - load, - 0, - true, - true, - 0.0)))); - } - } - return Duration.between(initialTime, clock().instant()); - } - - /** - * Adds measurements with the given resource value and ideal values for the other resources, - * scaled to take one node redundancy into account. - * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler - * wanting to see the ideal load with one node missing.) - * - * @param otherResourcesLoad the load factor relative to ideal to use for other resources - * @param count the number of measurements - * @param applicationId the application we're adding measurements for all nodes of - */ - public void addMemMeasurements(float value, float otherResourcesLoad, - int count, ApplicationId applicationId) { - NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); - float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); - for (int i = 0; i < count; i++) { - clock().advance(Duration.ofMinutes(1)); - for (Node node : nodes) { - float cpu = (float) 0.2 * otherResourcesLoad * oneExtraNodeFactor; - float memory = value * oneExtraNodeFactor; - float disk = (float) ClusterModel.idealContentDiskLoad * otherResourcesLoad * oneExtraNodeFactor; - Load load = new Load(0.2 * otherResourcesLoad, - value, - ClusterModel.idealContentDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor); - nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), - new NodeMetricSnapshot(clock().instant(), - load, - 0, - true, - true, - 0.0)))); - } - } - } - - public void addMeasurements(float cpu, float memory, float disk, int count, ApplicationId applicationId) { - addMeasurements(cpu, memory, disk, 0, true, true, count, applicationId); - } - - public void addMeasurements(float cpu, float memory, float disk, int generation, boolean inService, boolean stable, - int count, ApplicationId applicationId) { - NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId); - for (int i = 0; i < count; i++) { - clock().advance(Duration.ofMinutes(1)); - for (Node node : nodes) { - nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), - new NodeMetricSnapshot(clock().instant(), - new Load(cpu, memory, disk), - generation, - inService, - stable, - 0.0)))); - } - } - } - - public void storeReadShare(double currentReadShare, double maxReadShare, ApplicationId applicationId) { - Application application = nodeRepository().applications().require(applicationId); - application = application.with(application.status().withCurrentReadShare(currentReadShare) - .withMaxReadShare(maxReadShare)); - nodeRepository().applications().put(application, nodeRepository().nodes().lock(applicationId)); - } - /** Creates a single redeployment event with bogus data except for the given duration */ public void setScalingDuration(ApplicationId applicationId, ClusterSpec.Id clusterId, Duration duration) { Application application = nodeRepository().applications().require(applicationId); @@ -294,47 +140,6 @@ class AutoscalingTester { nodeRepository().applications().put(application, nodeRepository().nodes().lock(applicationId)); } - /** Creates the given number of measurements, spaced 5 minutes between, using the given function */ - public Duration addLoadMeasurements(ApplicationId application, - ClusterSpec.Id cluster, - int measurements, - IntFunction<Double> queryRate, - IntFunction<Double> writeRate) { - Instant initialTime = clock().instant(); - for (int i = 0; i < measurements; i++) { - nodeMetricsDb().addClusterMetrics(application, - Map.of(cluster, new ClusterMetricSnapshot(clock().instant(), - queryRate.apply(i), - writeRate.apply(i)))); - clock().advance(Duration.ofMinutes(5)); - } - return Duration.between(initialTime, clock().instant()); - } - - /** Creates the given number of measurements, spaced 5 minutes between, using the given function */ - public Duration addQueryRateMeasurements(ApplicationId application, - ClusterSpec.Id cluster, - int measurements, - IntFunction<Double> queryRate) { - return addQueryRateMeasurements(application, cluster, measurements, Duration.ofMinutes(5), queryRate); - } - - public Duration addQueryRateMeasurements(ApplicationId application, - ClusterSpec.Id cluster, - int measurements, - Duration samplingInterval, - IntFunction<Double> queryRate) { - Instant initialTime = clock().instant(); - for (int i = 0; i < measurements; i++) { - nodeMetricsDb().addClusterMetrics(application, - Map.of(cluster, new ClusterMetricSnapshot(clock().instant(), - queryRate.apply(i), - 0.0))); - clock().advance(samplingInterval); - } - return Duration.between(initialTime, clock().instant()); - } - public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec cluster, Capacity capacity) { capacity = capacityPolicies.applyOn(capacity, applicationId, capacityPolicies.decideExclusivity(capacity, cluster.isExclusive())); Application application = nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId)) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java index 896897f45c1..2edd797b78a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java @@ -3,18 +3,24 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.Cloud; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import java.time.Duration; +import java.util.Arrays; +import java.util.List; import java.util.Optional; -import java.util.function.IntFunction; +import java.util.stream.Collectors; /** * Fixture for autoscaling tests. @@ -24,105 +30,102 @@ import java.util.function.IntFunction; public class Fixture { final AutoscalingTester tester; - final ApplicationId application; - final ClusterSpec cluster; + final ApplicationId applicationId; + final ClusterSpec clusterSpec; final Capacity capacity; + final Loader loader; public Fixture(Fixture.Builder builder, Optional<ClusterResources> initialResources) { - application = builder.application; - cluster = builder.cluster; + applicationId = builder.application; + clusterSpec = builder.cluster; capacity = builder.capacity; - tester = new AutoscalingTester(builder.zone, builder.hostResources, builder.resourceCalculator); + tester = new AutoscalingTester(builder.zone, builder.resourceCalculator, builder.hostResources); var deployCapacity = initialResources.isPresent() ? Capacity.from(initialResources.get()) : capacity; tester.deploy(builder.application, builder.cluster, deployCapacity); + this.loader = new Loader(this); } - public AutoscalingTester tester() { return tester; } + public AutoscalingTester tester() { return tester; } - /** Autoscale within the deployed capacity of this. */ - public Autoscaler.Advice autoscale() { - return autoscale(capacity); - } + public ApplicationId applicationId() { return applicationId; } - /** Autoscale within the given capacity. */ - public Autoscaler.Advice autoscale(Capacity capacity) { - return tester().autoscale(application, cluster, capacity); + public ClusterSpec.Id clusterId() { return clusterSpec.id(); } + + public Application application() { + return tester().nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId)); } - /** Redeploy with the deployed capacity of this. */ - public void deploy() { - deploy(capacity); + public Cluster cluster() { + return application().cluster(clusterId()).get(); } - /** Redeploy with the given capacity. */ - public void deploy(Capacity capacity) { - tester().deploy(application, cluster, capacity); + public ClusterModel clusterModel() { + return new ClusterModel(application(), + clusterSpec, + cluster(), + nodes(), + tester.nodeRepository().metricsDb(), + tester.nodeRepository().clock()); } /** Returns the nodes allocated to the fixture application cluster */ public NodeList nodes() { - return tester().nodeRepository().nodes().list().owner(application).cluster(cluster.id()); + return tester().nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterSpec.id()); } - public void deactivateRetired(Capacity capacity) { - tester().deactivateRetired(application, cluster, capacity); - } + public Loader loader() { return loader; } - public void setScalingDuration(Duration duration) { - tester().setScalingDuration(application, cluster.id(), duration); + /** Autoscale within the deployed capacity of this. */ + public Autoscaler.Advice autoscale() { + return autoscale(capacity); } - public Duration addCpuMeasurements(double cpuLoad, int measurements) { - return tester().addCpuMeasurements((float)cpuLoad, 1.0f, measurements, application); + /** Autoscale within the given capacity. */ + public Autoscaler.Advice autoscale(Capacity capacity) { + return tester().autoscale(applicationId, clusterSpec, capacity); } - public Duration addLoadMeasurements(int measurements, IntFunction<Double> queryRate, IntFunction<Double> writeRate) { - return tester().addLoadMeasurements(application, cluster.id(), measurements, queryRate, writeRate); + /** Compute an autoscaling suggestion for this. */ + public Autoscaler.Advice suggest() { + return tester().suggest(applicationId, clusterSpec.id(), capacity.minResources(), capacity.maxResources()); } - public void applyCpuLoad(double cpuLoad, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements - tester().addCpuMeasurements((float)cpuLoad, 1.0f, measurements, application); - tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + /** Redeploy with the deployed capacity of this. */ + public void deploy() { + deploy(capacity); } - public void applyMemLoad(double memLoad, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements - tester().addMemMeasurements((float)memLoad, 1.0f, measurements, application); - tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + /** Redeploy with the given capacity. */ + public void deploy(Capacity capacity) { + tester().deploy(applicationId, clusterSpec, capacity); } - public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements - tester().addMeasurements((float)cpuLoad, (float)memoryLoad, (float)diskLoad, measurements, application); - tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + public void deactivateRetired(Capacity capacity) { + tester().deactivateRetired(applicationId, clusterSpec, capacity); } - public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int generation, boolean inService, boolean stable, int measurements) { - Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements - tester().addMeasurements((float)cpuLoad, (float)memoryLoad, (float)diskLoad, generation, inService, stable, measurements, application); - tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); - tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + public void setScalingDuration(Duration duration) { + tester().setScalingDuration(applicationId, clusterSpec.id(), duration); } public void storeReadShare(double currentReadShare, double maxReadShare) { - tester().storeReadShare(currentReadShare, maxReadShare, application); + var application = application(); + application = application.with(application.status().withCurrentReadShare(currentReadShare) + .withMaxReadShare(maxReadShare)); + tester.nodeRepository().applications().put(application, tester.nodeRepository().nodes().lock(applicationId)); } public static class Builder { - NodeResources hostResources = new NodeResources(100, 100, 100, 1); + ApplicationId application = AutoscalingTester.applicationId("application1"); + ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster1")).vespaVersion("7").build(); + Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); + List<NodeResources> hostResources = List.of(new NodeResources(100, 100, 100, 1)); Optional<ClusterResources> initialResources = Optional.of(new ClusterResources(5, 1, new NodeResources(3, 10, 100, 1))); Capacity capacity = Capacity.from(new ClusterResources(2, 1, new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.any)), new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any))); - ApplicationId application = AutoscalingTester.applicationId("application1"); - ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster1")).vespaVersion("7").build(); - Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); HostResourcesCalculator resourceCalculator = new AutoscalingTester.MockHostResourcesCalculator(zone, 0); public Fixture.Builder zone(Zone zone) { @@ -130,13 +133,27 @@ public class Fixture { return this; } + /** + * Set to true to behave as if hosts are provisioned dynamically, + * and must therefore be allocated completely to one tenant node. + */ + public Fixture. Builder dynamicProvisioning(boolean dynamic) { + this.zone = new Zone(Cloud.builder() + .dynamicProvisioning(dynamic) + .build(), + zone.system(), + zone.environment(), + zone.region()); + return this; + } + public Fixture.Builder clusterType(ClusterSpec.Type type) { cluster = ClusterSpec.request(type, cluster.id()).vespaVersion("7").build(); return this; } - public Fixture.Builder hostResources(NodeResources hostResources) { - this.hostResources = hostResources; + public Fixture.Builder hostResources(NodeResources ... hostResources) { + this.hostResources = Arrays.stream(hostResources).collect(Collectors.toList()); return this; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java new file mode 100644 index 00000000000..db4fe917b53 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java @@ -0,0 +1,158 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; + +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.function.IntFunction; + +/** + * A helper for applying load to an application represented by a fixture, + * + * @author bratseth + */ +public class Loader { + + private final Fixture fixture; + + public Loader(Fixture fixture) { + this.fixture = fixture; + } + + /** + * Adds measurements with the given resource value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + * + * @param count the number of measurements + */ + public Duration addCpuMeasurements(double value, int count) { + NodeList nodes = fixture.nodes(); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + Instant initialTime = fixture.tester().clock().instant(); + for (int i = 0; i < count; i++) { + fixture.tester().clock().advance(Duration.ofSeconds(150)); + for (Node node : nodes) { + Load load = new Load(value, + ClusterModel.idealMemoryLoad, + ClusterModel.idealContentDiskLoad).multiply(oneExtraNodeFactor); + fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), + new NodeMetricSnapshot(fixture.tester().clock().instant(), + load, + 0, + true, + true, + 0.0)))); + } + } + return Duration.between(initialTime, fixture.tester().clock().instant()); + } + + /** Creates the given number of measurements, spaced 5 minutes between, using the given function */ + public Duration addLoadMeasurements(int measurements, IntFunction<Double> queryRate, IntFunction<Double> writeRate) { + Instant initialTime = fixture.tester().clock().instant(); + for (int i = 0; i < measurements; i++) { + fixture.tester().nodeMetricsDb().addClusterMetrics(fixture.applicationId(), + Map.of(fixture.clusterId(), new ClusterMetricSnapshot(fixture.tester().clock().instant(), + queryRate.apply(i), + writeRate.apply(i)))); + fixture.tester().clock().advance(Duration.ofMinutes(5)); + } + return Duration.between(initialTime, fixture.tester().clock().instant()); + } + + public void applyCpuLoad(double cpuLoad, int measurements) { + Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements + addCpuMeasurements((float)cpuLoad, measurements); + fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); + addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + } + + public void applyMemLoad(double memLoad, int measurements) { + Duration samplingInterval = Duration.ofSeconds(150L); // in addMemMeasurements + addMemMeasurements(memLoad, measurements); + fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); + addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + } + + /** + * Adds measurements with the given resource value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + */ + public void addMemMeasurements(double value, int count) { + NodeList nodes = fixture.nodes(); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + for (int i = 0; i < count; i++) { + fixture.tester().clock().advance(Duration.ofMinutes(1)); + for (Node node : nodes) { + Load load = new Load(0.2, + value, + ClusterModel.idealContentDiskLoad).multiply(oneExtraNodeFactor); + fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), + new NodeMetricSnapshot(fixture.tester().clock().instant(), + load, + 0, + true, + true, + 0.0)))); + } + } + } + + public Duration addMeasurements(double cpu, double memory, double disk, int count) { + return addMeasurements(cpu, memory, disk, 0, true, true, count); + } + + public Duration addMeasurements(double cpu, double memory, double disk, int generation, boolean inService, boolean stable, + int count) { + Instant initialTime = fixture.tester().clock().instant(); + for (int i = 0; i < count; i++) { + fixture.tester().clock().advance(Duration.ofMinutes(1)); + for (Node node : fixture.nodes()) { + fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(), + new NodeMetricSnapshot(fixture.tester().clock().instant(), + new Load(cpu, memory, disk), + generation, + inService, + stable, + 0.0)))); + } + } + return Duration.between(initialTime, fixture.tester().clock().instant()); + } + + public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int measurements) { + Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements + addMeasurements(cpuLoad, memoryLoad, diskLoad, measurements); + fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); + addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + } + + public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int generation, boolean inService, boolean stable, int measurements) { + Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements + addMeasurements(cpuLoad, memoryLoad, diskLoad, generation, inService, stable, measurements); + fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements)); + addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + } + + public Duration addQueryRateMeasurements(int measurements, Duration samplingInterval, IntFunction<Double> queryRate) { + Instant initialTime = fixture.tester().clock().instant(); + for (int i = 0; i < measurements; i++) { + fixture.tester().nodeMetricsDb().addClusterMetrics(fixture.applicationId(), + Map.of(fixture.clusterId(), new ClusterMetricSnapshot(fixture.tester().clock().instant(), + queryRate.apply(i), + 0.0))); + fixture.tester().clock().advance(samplingInterval); + } + return Duration.between(initialTime, fixture.tester().clock().instant()); + } + +} diff --git a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java index 6b50ea4b360..2883191cc0a 100644 --- a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java +++ b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java @@ -76,17 +76,18 @@ public class ApplicationMojo extends AbstractMojo { throw new IllegalArgumentException("compile version (" + compileVersion + ") cannot be higher than parent version (" + parentVersion + ")"); } - String metaData = String.format("{\n" + - " \"compileVersion\": \"%s\",\n" + - " \"buildTime\": %d,\n" + - " \"parentVersion\": %s\n" + - "}", + String metaData = String.format(""" + { + "compileVersion": "%s", + "buildTime": %d, + "parentVersion": %s + } + """, compileVersion, System.currentTimeMillis(), parentVersion == null ? null : "\"" + parentVersion + "\""); try { - Files.write(applicationDestination.toPath().resolve("build-meta.json"), - metaData.getBytes(StandardCharsets.UTF_8)); + Files.writeString(applicationDestination.toPath().resolve("build-meta.json"), metaData); } catch (IOException e) { throw new MojoExecutionException("Failed writing compile version and build time.", e); diff --git a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java index 050871043ca..bf7180f6705 100644 --- a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java +++ b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java @@ -3,7 +3,6 @@ package com.yahoo.container.plugin.mojo; import java.util.Comparator; import java.util.Objects; -import static java.lang.Integer.min; import static java.lang.Integer.parseInt; /** |