diff options
77 files changed, 689 insertions, 603 deletions
diff --git a/client/go/script-utils/standalone/start.go b/client/go/script-utils/standalone/start.go index 7b929730f5a..111b1baeccc 100644 --- a/client/go/script-utils/standalone/start.go +++ b/client/go/script-utils/standalone/start.go @@ -40,6 +40,7 @@ func StartStandaloneContainer(extraArgs []string) int { } c := jvm.NewStandaloneContainer(serviceName) jvmOpts := c.JvmOptions() + jvmOpts.AddOption("-DOnnxBundleActivator.skip=true") for _, extra := range extraArgs { jvmOpts.AddOption(extra) } diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/Reindexing.java b/config-model-api/src/main/java/com/yahoo/config/model/api/Reindexing.java index 74b63a56ea6..b1164709bfc 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/Reindexing.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/Reindexing.java @@ -26,6 +26,9 @@ public interface Reindexing { /** The relative speed with which to reindex. */ double speed(); + /** The cause of reindexing for this document type. */ + String cause(); + } Reindexing DISABLED_INSTANCE = new Reindexing() {}; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index b81b3524c67..781675b9a30 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -10,6 +10,7 @@ import com.yahoo.config.model.api.ValidationParameters; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.application.validation.change.CertificateRemovalChangeValidator; import com.yahoo.vespa.model.application.validation.change.ChangeValidator; import com.yahoo.vespa.model.application.validation.change.CloudAccountChangeValidator; import com.yahoo.vespa.model.application.validation.change.ClusterSizeReductionValidator; @@ -121,7 +122,8 @@ public class Validation { new ContainerRestartValidator(), new NodeResourceChangeValidator(), new RedundancyIncreaseValidator(), - new CloudAccountChangeValidator() + new CloudAccountChangeValidator(), + new CertificateRemovalChangeValidator() }; List<ConfigChangeAction> actions = Arrays.stream(validators) .flatMap(v -> v.validate(currentModel, nextModel, overrides, now).stream()) diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/CertificateRemovalChangeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/CertificateRemovalChangeValidator.java index 50fd6572bcc..1df33ab8517 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/CertificateRemovalChangeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/CertificateRemovalChangeValidator.java @@ -17,7 +17,8 @@ public class CertificateRemovalChangeValidator implements ChangeValidator { current.getContainerClusters() .forEach((clusterId, currentCluster) -> { - validateClients(clusterId, currentCluster.getClients(), next.getContainerClusters().get(clusterId).getClients(), overrides, now); + if(next.getContainerClusters().containsKey(clusterId)) + validateClients(clusterId, currentCluster.getClients(), next.getContainerClusters().get(clusterId).getClients(), overrides, now); }); return List.of(); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/admin/ClusterControllerTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/admin/ClusterControllerTestCase.java index eb77187014f..155d81aabf9 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/admin/ClusterControllerTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/admin/ClusterControllerTestCase.java @@ -544,5 +544,7 @@ public class ClusterControllerTestCase extends DomBuilderTest { @Override public boolean enabled() { return true; } @Override public Instant ready() { return Instant.EPOCH; } @Override public double speed() { return 1; } + @Override public String cause() { return "cause"; } + } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java index 34fe8a2159d..438ac7dc9a8 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java @@ -174,6 +174,7 @@ public class ApplicationCuratorDatabase { private static final String GENERATION = "generation"; private static final String EPOCH_MILLIS = "epochMillis"; private static final String SPEED = "speed"; + private static final String CAUSE = "cause"; private static byte[] toBytes(ApplicationReindexing reindexing) { Cursor root = new Slime().setObject(); @@ -204,6 +205,7 @@ public class ApplicationCuratorDatabase { private static void setStatus(Cursor statusObject, Status status) { statusObject.setLong(EPOCH_MILLIS, status.ready().toEpochMilli()); statusObject.setDouble(SPEED, status.speed()); + statusObject.setString(CAUSE, status.cause()); } private static ApplicationReindexing fromBytes(byte[] data) { @@ -225,7 +227,8 @@ public class ApplicationCuratorDatabase { private static Status getStatus(Inspector statusObject) { return new Status(Instant.ofEpochMilli(statusObject.field(EPOCH_MILLIS).asLong()), - statusObject.field(SPEED).valid() ? statusObject.field(SPEED).asDouble() : 0.2); + statusObject.field(SPEED).valid() ? statusObject.field(SPEED).asDouble() : 0.2, + statusObject.field(CAUSE).asString()); } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationReindexing.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationReindexing.java index 19395b10b2b..f4e1918e6e3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationReindexing.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationReindexing.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.config.server.application; import com.yahoo.config.model.api.Reindexing; +import com.yahoo.vespa.config.server.maintenance.ReindexingMaintainer; import java.time.Instant; import java.time.temporal.ChronoUnit; @@ -37,10 +38,10 @@ public class ApplicationReindexing implements Reindexing { } /** Returns a copy of this with reindexing for the given document type in the given cluster ready at the given instant. */ - public ApplicationReindexing withReady(String cluster, String documentType, Instant readyAt, double speed) { + public ApplicationReindexing withReady(String cluster, String documentType, Instant readyAt, double speed, String cause) { Cluster current = clusters.getOrDefault(cluster, Cluster.empty()); Cluster modified = new Cluster(current.pending, - with(documentType, new Status(readyAt, speed), current.ready)); + with(documentType, new Status(readyAt, speed, cause), current.ready)); return new ApplicationReindexing(enabled, with(cluster, modified, clusters)); } @@ -172,13 +173,17 @@ public class ApplicationReindexing implements Reindexing { private final Instant ready; private final double speed; + private final String cause; - Status(Instant ready, double speed) { + Status(Instant ready, double speed, String cause) { if (speed <= 0 || 10 < speed) throw new IllegalArgumentException("Reindexing speed must be in (0, 10], but was " + speed); this.ready = ready.truncatedTo(ChronoUnit.MILLIS); this.speed = speed; + this.cause = cause.isBlank() ? speed < ReindexingMaintainer.SPEED ? "background reindexing, to account for changes in built-in linguistics components" + : "reindexing due to a schema change" + : cause; } @Override @@ -190,21 +195,26 @@ public class ApplicationReindexing implements Reindexing { } @Override + public String cause() { + return cause; + } + + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Status status = (Status) o; - return Double.compare(status.speed, speed) == 0 && ready.equals(status.ready); + return Double.compare(status.speed, speed) == 0 && ready.equals(status.ready) && cause.equals(status.cause); } @Override public int hashCode() { - return Objects.hash(ready, speed); + return Objects.hash(ready, speed, cause); } @Override public String toString() { - return "ready at " + ready + ", with relative speed " + speed; + return cause + ", ready at " + ready + ", with relative speed " + speed; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java index 8afffd93a29..4d28b58beb3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandler.java @@ -252,6 +252,7 @@ public class ApplicationHandler extends HttpHandler { Set<String> clusters = StringUtilities.split(request.getProperty("clusterId")); Set<String> types = StringUtilities.split(request.getProperty("documentType")); double speed = Double.parseDouble(Objects.requireNonNullElse(request.getProperty("speed"), "1")); + String cause = Objects.requireNonNullElse(request.getProperty("cause"), "reindexing for an unknown reason"); Map<String, Set<String>> reindexed = new TreeMap<>(); Instant now = applicationRepository.clock().instant(); @@ -267,7 +268,7 @@ public class ApplicationHandler extends HttpHandler { String.join(", ", documentTypes.get(cluster))); if ( ! indexedOnly || indexedDocumentTypes.get(cluster).contains(type)) { - reindexing = reindexing.withReady(cluster, type, now, speed); + reindexing = reindexing.withReady(cluster, type, now, speed, cause); reindexed.computeIfAbsent(cluster, __ -> new TreeSet<>()).add(type); } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/response/ReindexingResponse.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/response/ReindexingResponse.java index 1844b204a63..3ad09ea6345 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/response/ReindexingResponse.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/v2/response/ReindexingResponse.java @@ -40,6 +40,7 @@ public class ReindexingResponse extends JSONResponse { private static void setStatus(Cursor object, ApplicationReindexing.Status readyStatus) { object.setLong("readyMillis", readyStatus.ready().toEpochMilli()); object.setDouble("speed", readyStatus.speed()); + object.setString("cause", readyStatus.cause()); } private static void setStatus(Cursor object, ClusterReindexing.Status status) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java index 99c3ecae62a..05f3f2c6483 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java @@ -40,7 +40,8 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { private static final Duration timeout = Duration.ofSeconds(10); /** Relative reindexing speed. */ - static final double SPEED = 1; + public static final double SPEED = 1; + static final String CAUSE = "reindexing due to a schema change"; private final ConfigConvergenceChecker convergence; private final Clock clock; @@ -96,7 +97,7 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { for (var cluster : reindexing.clusters().entrySet()) for (var pending : cluster.getValue().pending().entrySet()) if (pending.getValue() <= oldestGeneration.get()) { - reindexing = reindexing.withReady(cluster.getKey(), pending.getKey(), now, SPEED) + reindexing = reindexing.withReady(cluster.getKey(), pending.getKey(), now, SPEED, CAUSE) .withoutPending(cluster.getKey(), pending.getKey()); } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java index eb66e7abd88..cbdb462c35e 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java @@ -9,6 +9,7 @@ import java.time.Instant; import java.util.Optional; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; /** * @author jonmv @@ -23,8 +24,11 @@ public class ApplicationCuratorDatabaseTest { assertEquals(Optional.empty(), db.readReindexingStatus(id)); ApplicationReindexing reindexing = ApplicationReindexing.empty() - .withPending("one", "a", 10).withReady("two", "b", Instant.ofEpochMilli(2), 0.2) - .withPending("two", "b", 20).withReady("one", "a", Instant.ofEpochMilli(1), 0.2).withReady("two", "c", Instant.ofEpochMilli(3), 0.2) + .withPending("one", "a", 10) + .withReady("two", "b", Instant.ofEpochMilli(2), 0.2, "test reindexing") + .withPending("two", "b", 20) + .withReady("one", "a", Instant.ofEpochMilli(1), 0.2, "test reindexing") + .withReady("two", "c", Instant.ofEpochMilli(3), 0.2, "test reindexing") .enabled(false); db.writeReindexingStatus(id, reindexing); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationReindexingTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationReindexingTest.java index 3a19cee6675..80ac28e9dbc 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationReindexingTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationReindexingTest.java @@ -22,10 +22,10 @@ public class ApplicationReindexingTest { public void test() { ApplicationReindexing reindexing = ApplicationReindexing.empty() .withPending("one", "a", 10) - .withReady("two", "b", Instant.ofEpochMilli(2), 3) + .withReady("two", "b", Instant.ofEpochMilli(2), 3, "test reindexing") .withPending("two", "b", 20) - .withReady("one", "a", Instant.ofEpochMilli(1), 1) - .withReady("two", "a", Instant.ofEpochMilli(3), 2) + .withReady("one", "a", Instant.ofEpochMilli(1), 1, "test reindexing") + .withReady("two", "a", Instant.ofEpochMilli(3), 2, "test reindexing") .withoutPending("one", "a"); assertEquals(Instant.ofEpochMilli(1), @@ -51,14 +51,14 @@ public class ApplicationReindexingTest { assertEquals(Set.of("one", "two"), reindexing.clusters().keySet()); - assertEquals(Map.of("a", new Status(Instant.ofEpochMilli(1), 1)), + assertEquals(Map.of("a", new Status(Instant.ofEpochMilli(1), 1, "test reindexing")), reindexing.clusters().get("one").ready()); assertEquals(Map.of(), reindexing.clusters().get("one").pending()); - assertEquals(Map.of("a", new Status(Instant.ofEpochMilli(3), 2), - "b", new Status(Instant.ofEpochMilli(2), 3)), + assertEquals(Map.of("a", new Status(Instant.ofEpochMilli(3), 2, "test reindexing"), + "b", new Status(Instant.ofEpochMilli(2), 3, "test reindexing")), reindexing.clusters().get("two").ready()); assertEquals(Map.of("b", 20L), diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java index d543820fc68..b24d37f29fd 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java @@ -241,37 +241,48 @@ public class ApplicationHandlerTest { clock.advance(Duration.ofSeconds(1)); reindex(applicationId, "", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar, bax, baz] in 'foo' of application default.default\"}"); - expected = expected.withReady("boo", "bar", clock.instant(), 1).withReady("foo", "bar", clock.instant(), 1).withReady("foo", "baz", clock.instant(), 1).withReady("foo", "bax", clock.instant(), 1); + expected = expected.withReady("boo", "bar", clock.instant(), 1, "reindexing for an unknown reason") + .withReady("foo", "bar", clock.instant(), 1, "reindexing for an unknown reason") + .withReady("foo", "baz", clock.instant(), 1, "reindexing for an unknown reason") + .withReady("foo", "bax", clock.instant(), 1, "reindexing for an unknown reason"); assertEquals(expected, database.readReindexingStatus(applicationId).orElseThrow()); clock.advance(Duration.ofSeconds(1)); - reindex(applicationId, "?indexedOnly=true", "{\"message\":\"Reindexing document types [bar] in 'foo' of application default.default\"}"); - expected = expected.withReady("foo", "bar", clock.instant(), 1); + reindex(applicationId, "?indexedOnly=true&cause=test%20reindexing", "{\"message\":\"Reindexing document types [bar] in 'foo' of application default.default\"}"); + expected = expected.withReady("foo", "bar", clock.instant(), 1, "test reindexing"); assertEquals(expected, database.readReindexingStatus(applicationId).orElseThrow()); clock.advance(Duration.ofSeconds(1)); - expected = expected.withReady("boo", "bar", clock.instant(), 1).withReady("foo", "bar", clock.instant(), 1).withReady("foo", "baz", clock.instant(), 1).withReady("foo", "bax", clock.instant(), 1); - reindex(applicationId, "?clusterId=", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar, bax, baz] in 'foo' of application default.default\"}"); + expected = expected.withReady("boo", "bar", clock.instant(), 1, "reindexing") + .withReady("foo", "bar", clock.instant(), 1, "reindexing") + .withReady("foo", "baz", clock.instant(), 1, "reindexing") + .withReady("foo", "bax", clock.instant(), 1, "reindexing"); + reindex(applicationId, "?clusterId=&cause=reindexing", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar, bax, baz] in 'foo' of application default.default\"}"); assertEquals(expected, database.readReindexingStatus(applicationId).orElseThrow()); clock.advance(Duration.ofSeconds(1)); - expected = expected.withReady("boo", "bar", clock.instant(), 1).withReady("foo", "bar", clock.instant(), 1); - reindex(applicationId, "?documentType=bar", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar] in 'foo' of application default.default\"}"); + expected = expected.withReady("boo", "bar", clock.instant(), 1, "reindexing") + .withReady("foo", "bar", clock.instant(), 1, "reindexing"); + reindex(applicationId, "?documentType=bar&cause=reindexing", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar] in 'foo' of application default.default\"}"); assertEquals(expected, database.readReindexingStatus(applicationId).orElseThrow()); clock.advance(Duration.ofSeconds(1)); - reindex(applicationId, "?clusterId=foo,boo", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar, bax, baz] in 'foo' of application default.default\"}"); - expected = expected.withReady("boo", "bar", clock.instant(), 1).withReady("foo", "bar", clock.instant(), 1).withReady("foo", "baz", clock.instant(), 1).withReady("foo", "bax", clock.instant(), 1); + reindex(applicationId, "?clusterId=foo,boo&cause=reindexing", "{\"message\":\"Reindexing document types [bar] in 'boo', [bar, bax, baz] in 'foo' of application default.default\"}"); + expected = expected.withReady("boo", "bar", clock.instant(), 1, "reindexing") + .withReady("foo", "bar", clock.instant(), 1, "reindexing") + .withReady("foo", "baz", clock.instant(), 1, "reindexing") + .withReady("foo", "bax", clock.instant(), 1, "reindexing"); assertEquals(expected, database.readReindexingStatus(applicationId).orElseThrow()); clock.advance(Duration.ofSeconds(1)); - reindex(applicationId, "?clusterId=foo&documentType=bar,baz&speed=0.1", "{\"message\":\"Reindexing document types [bar, baz] in 'foo' of application default.default\"}"); - expected = expected.withReady("foo", "bar", clock.instant(), 0.1).withReady("foo", "baz", clock.instant(), 0.1); + reindex(applicationId, "?clusterId=foo&documentType=bar,baz&speed=0.1&cause=reindexing", "{\"message\":\"Reindexing document types [bar, baz] in 'foo' of application default.default\"}"); + expected = expected.withReady("foo", "bar", clock.instant(), 0.1, "reindexing") + .withReady("foo", "baz", clock.instant(), 0.1, "reindexing"); assertEquals(expected, database.readReindexingStatus(applicationId).orElseThrow()); @@ -298,7 +309,8 @@ public class ApplicationHandlerTest { " \"ready\": {" + " \"bar\": {" + " \"readyMillis\": " + (now - 1000) + ", " + - " \"speed\": 1.0" + + " \"speed\": 1.0," + + " \"cause\": \"reindexing\"" + " }" + " }" + " }," + @@ -307,15 +319,18 @@ public class ApplicationHandlerTest { " \"ready\": {" + " \"bar\": {" + " \"readyMillis\": " + now + ", " + - " \"speed\": 0.1" + + " \"speed\": 0.1," + + " \"cause\": \"reindexing\"" + " }," + " \"bax\": {" + " \"readyMillis\": " + (now - 1000) + ", " + - " \"speed\": 1.0" + + " \"speed\": 1.0," + + " \"cause\": \"reindexing\"" + " }," + " \"baz\": {" + " \"readyMillis\": " + now + ", " + - " \"speed\": 0.1" + + " \"speed\": 0.1," + + " \"cause\": \"reindexing\"" + " }" + " }" + " }" + @@ -499,7 +514,7 @@ public class ApplicationHandlerTest { public void testReindexingSerialization() throws IOException { Instant now = Instant.ofEpochMilli(123456); ApplicationReindexing applicationReindexing = ApplicationReindexing.empty() - .withPending("foo", "bar", 123L).withReady("moo", "baz", now, 1); + .withPending("foo", "bar", 123L).withReady("moo", "baz", now, 1, "reindexing"); ClusterReindexing clusterReindexing = new ClusterReindexing(Map.of("bax", new Status(now, null, null, null, null), "baz", new Status(now.plusSeconds(1), now.plusSeconds(2), @@ -514,53 +529,56 @@ public class ApplicationHandlerTest { applicationReindexing, Map.of("boo", clusterReindexing, "moo", clusterReindexing))), - "{\n" + - " \"enabled\": true,\n" + - " \"clusters\": {\n" + - " \"boo\": {\n" + - " \"pending\": {},\n" + - " \"ready\": {\n" + - " \"bar\": {},\n" + - " \"bax\": {\n" + - " \"startedMillis\": 123456\n" + - " },\n" + - " \"baz\": {\n" + - " \"startedMillis\": 124456,\n" + - " \"endedMillis\": 125456,\n" + - " \"state\": \"failed\",\n" + - " \"message\": \"message\",\n" + - " \"progress\": 0.1\n" + - " }\n" + - " }\n" + - " },\n" + - " \"foo\": {\n" + - " \"pending\": {\n" + - " \"bar\": 123\n" + - " },\n" + - " \"ready\": {\n" + - " \"bar\": {},\n" + - " \"hax\": {}\n" + - " }\n" + - " },\n" + - " \"moo\": {\n" + - " \"pending\": {},\n" + - " \"ready\": {\n" + - " \"bax\": {\n" + - " \"startedMillis\": 123456\n" + - " },\n" + - " \"baz\": {\n" + - " \"readyMillis\": 123456,\n" + - " \"speed\": 1.0,\n" + - " \"startedMillis\": 124456,\n" + - " \"endedMillis\": 125456,\n" + - " \"state\": \"failed\",\n" + - " \"message\": \"message\",\n" + - " \"progress\": 0.1\n" + - " }\n" + - " }\n" + - " }\n" + - " }\n" + - "}\n"); + """ + { + "enabled": true, + "clusters": { + "boo": { + "pending": {}, + "ready": { + "bar": {}, + "bax": { + "startedMillis": 123456 + }, + "baz": { + "startedMillis": 124456, + "endedMillis": 125456, + "state": "failed", + "message": "message", + "progress": 0.1 + } + } + }, + "foo": { + "pending": { + "bar": 123 + }, + "ready": { + "bar": {}, + "hax": {} + } + }, + "moo": { + "pending": {}, + "ready": { + "bax": { + "startedMillis": 123456 + }, + "baz": { + "readyMillis": 123456, + "speed": 1.0, + "cause": "reindexing", + "startedMillis": 124456, + "endedMillis": 125456, + "state": "failed", + "message": "message", + "progress": 0.1 + } + } + } + } + } + """); } @Test diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainerTest.java index 26edc5d0f71..5a78d81e508 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainerTest.java @@ -8,6 +8,7 @@ import java.time.Instant; import java.util.List; import java.util.Map; +import static com.yahoo.vespa.config.server.maintenance.ReindexingMaintainer.CAUSE; import static com.yahoo.vespa.config.server.maintenance.ReindexingMaintainer.SPEED; import static com.yahoo.vespa.config.server.maintenance.ReindexingMaintainer.withNewReady; import static com.yahoo.vespa.config.server.maintenance.ReindexingMaintainer.withOnlyCurrentData; @@ -22,7 +23,10 @@ public class ReindexingMaintainerTest { public void testReadyComputation() { ApplicationReindexing reindexing = ApplicationReindexing.empty() .withPending("one", "a", 10) - .withPending("two", "b", 20).withReady("one", "a", Instant.ofEpochMilli(3), SPEED).withReady("two", "b", Instant.ofEpochMilli(2), SPEED).withReady("two", "c", Instant.ofEpochMilli(3), SPEED); + .withPending("two", "b", 20) + .withReady("one", "a", Instant.ofEpochMilli(3), SPEED, CAUSE) + .withReady("two", "b", Instant.ofEpochMilli(2), SPEED, CAUSE) + .withReady("two", "c", Instant.ofEpochMilli(3), SPEED, CAUSE); // Nothing happens without convergence. assertEquals(reindexing, @@ -31,13 +35,13 @@ public class ReindexingMaintainerTest { // Status for (one, a) changes, but not (two, b). Instant later = Instant.ofEpochMilli(3 << 10); // Converged, no longer pending. - assertEquals(reindexing.withoutPending("one", "a").withReady("one", "a", later, SPEED), // Converged, now ready. + assertEquals(reindexing.withoutPending("one", "a").withReady("one", "a", later, SPEED, CAUSE), // Converged, now ready. withNewReady(reindexing, () -> 19L, later)); // Converged, no longer pending. // Converged, no Longer pending. - assertEquals(reindexing.withoutPending("one", "a").withReady("one", "a", later, SPEED) - .withoutPending("two", "b").withReady("two", "b", later, SPEED), + assertEquals(reindexing.withoutPending("one", "a").withReady("one", "a", later, SPEED, CAUSE) + .withoutPending("two", "b").withReady("two", "b", later, SPEED, CAUSE), withNewReady(reindexing, () -> 20L, later)); // Verify generation supplier isn't called when no pending document types. @@ -50,7 +54,10 @@ public class ReindexingMaintainerTest { public void testGarbageRemoval() { ApplicationReindexing reindexing = ApplicationReindexing.empty() .withPending("one", "a", 10) - .withPending("two", "b", 20).withReady("one", "a", Instant.ofEpochMilli(3), SPEED).withReady("two", "b", Instant.ofEpochMilli(2), SPEED).withReady("two", "c", Instant.ofEpochMilli(3), SPEED); + .withPending("two", "b", 20) + .withReady("one", "a", Instant.ofEpochMilli(3), SPEED, CAUSE) + .withReady("two", "b", Instant.ofEpochMilli(2), SPEED, CAUSE) + .withReady("two", "c", Instant.ofEpochMilli(3), SPEED, CAUSE); assertEquals(reindexing, withOnlyCurrentData(reindexing, Map.of("one", List.of("a", "b", "c", "d"), @@ -62,10 +69,13 @@ public class ReindexingMaintainerTest { "two", List.of("b", "c")))); assertEquals(ApplicationReindexing.empty() - .withPending("two", "b", 20).withReady("two", "b", Instant.ofEpochMilli(2), SPEED), + .withPending("two", "b", 20).withReady("two", "b", Instant.ofEpochMilli(2), SPEED, CAUSE), withOnlyCurrentData(reindexing, Map.of("two", List.of("a", "b")))); - assertEquals(ApplicationReindexing.empty().withReady("one", "a", Instant.EPOCH, SPEED).without("one", "a").withReady("two", "c", Instant.ofEpochMilli(3), SPEED), + assertEquals(ApplicationReindexing.empty() + .withReady("one", "a", Instant.EPOCH, SPEED, CAUSE) + .without("one", "a") + .withReady("two", "c", Instant.ofEpochMilli(3), SPEED, CAUSE), withOnlyCurrentData(reindexing, Map.of("one", List.of("c"), "two", List.of("c")))); } diff --git a/container-onnxruntime/src/main/java/ai/vespa/onnxruntime/OnnxBundleActivator.java b/container-onnxruntime/src/main/java/ai/vespa/onnxruntime/OnnxBundleActivator.java index 66dbed98073..81eff2be250 100644 --- a/container-onnxruntime/src/main/java/ai/vespa/onnxruntime/OnnxBundleActivator.java +++ b/container-onnxruntime/src/main/java/ai/vespa/onnxruntime/OnnxBundleActivator.java @@ -23,18 +23,24 @@ public class OnnxBundleActivator implements BundleActivator { @Override public void start(BundleContext ctx) { + String skipAll = OnnxBundleActivator.class.getSimpleName() + SKIP_SUFFIX; + if (SKIP_VALUE.equals(System.getProperty(skipAll))) { + log.info("skip loading of native libraries"); + return; + } for (String libName : LIBRARY_NAMES) { String skipProp = SKIP_PREFIX + libName + SKIP_SUFFIX; if (SKIP_VALUE.equals(System.getProperty(skipProp))) { - log.info("already loaded native library "+libName+", skipping"); + log.fine("already loaded native library "+libName+", skipping"); } else { - System.setProperty(skipProp, SKIP_VALUE); - log.info("loading native library: "+libName); + log.fine("loading native library: "+libName); try { System.loadLibrary(libName); - log.fine("loaded native library OK: "+libName); + // this property also signals onnxruntime to skip loading: + System.setProperty(skipProp, SKIP_VALUE); + log.info("loaded native library OK: "+libName); } catch (Exception|UnsatisfiedLinkError e) { - log.warning("Could not load native library '"+libName+"' because: "+e.getMessage()); + log.info("Could not load native library '"+libName+"' because: "+e.getMessage()); } } } @@ -46,8 +52,10 @@ public class OnnxBundleActivator implements BundleActivator { // but this should in theory do the necessary thing. for (String libName : LIBRARY_NAMES) { String skipProp = SKIP_PREFIX + libName + SKIP_SUFFIX; + if (SKIP_VALUE.equals(System.getProperty(skipProp))) { + log.info("will unload native library: "+libName); + } System.clearProperty(skipProp); - log.info("will unload native library: "+libName); } } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ApplicationReindexing.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ApplicationReindexing.java index 305cd4c802e..d1ecae63f79 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ApplicationReindexing.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ApplicationReindexing.java @@ -87,7 +87,7 @@ public class ApplicationReindexing { @Override public String toString() { return "Cluster{" + - ", pending=" + pending + + "pending=" + pending + ", ready=" + ready + '}'; } @@ -104,8 +104,9 @@ public class ApplicationReindexing { private final String message; private final Double progress; private final Double speed; + private final String cause; - public Status(Instant readyAt, Instant startedAt, Instant endedAt, State state, String message, Double progress, Double speed) { + public Status(Instant readyAt, Instant startedAt, Instant endedAt, State state, String message, Double progress, Double speed, String cause) { this.readyAt = readyAt; this.startedAt = startedAt; this.endedAt = endedAt; @@ -113,10 +114,11 @@ public class ApplicationReindexing { this.message = message; this.progress = progress; this.speed = speed; + this.cause = cause; } public Status(Instant readyAt) { - this(readyAt, null, null, null, null, null, 1.0); + this(readyAt, null, null, null, null, null, 1.0, null); } public Optional<Instant> readyAt() { return Optional.ofNullable(readyAt); } @@ -126,18 +128,26 @@ public class ApplicationReindexing { public Optional<String> message() { return Optional.ofNullable(message); } public Optional<Double> progress() { return Optional.ofNullable(progress); } public Optional<Double> speed() { return Optional.ofNullable(speed); } + public Optional<String> cause() { return Optional.ofNullable(cause); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Status status = (Status) o; - return Objects.equals(readyAt, status.readyAt) && Objects.equals(startedAt, status.startedAt) && Objects.equals(endedAt, status.endedAt) && state == status.state && Objects.equals(message, status.message) && Objects.equals(progress, status.progress) && Objects.equals(speed, status.speed); + return Objects.equals(readyAt, status.readyAt) && + Objects.equals(startedAt, status.startedAt) && + Objects.equals(endedAt, status.endedAt) && + state == status.state && + Objects.equals(message, status.message) && + Objects.equals(progress, status.progress) && + Objects.equals(speed, status.speed) && + Objects.equals(cause, status.cause); } @Override public int hashCode() { - return Objects.hash(readyAt, startedAt, endedAt, state, message, progress, speed); + return Objects.hash(readyAt, startedAt, endedAt, state, message, progress, speed, cause); } @Override @@ -150,6 +160,7 @@ public class ApplicationReindexing { ", message='" + message + '\'' + ", progress=" + progress + ", speed=" + speed + + ", cause='" + cause + '\'' + '}'; } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java index 119e6794e6c..cc05bc01d99 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/ConfigServer.java @@ -38,7 +38,7 @@ public interface ConfigServer { PreparedApplication deploy(DeploymentData deployment); - void reindex(DeploymentId deployment, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly, Double speed); + void reindex(DeploymentId deployment, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly, Double speed, String cause); ApplicationReindexing getReindexing(DeploymentId deployment); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 602a53035a6..444a0ea5ac1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -210,13 +210,13 @@ public class ApplicationController { /** * Triggers reindexing for the given document types in the given clusters, for the given application. - * + * <p> * If no clusters are given, reindexing is triggered for the entire application; otherwise * if no documents types are given, reindexing is triggered for all given clusters; otherwise * reindexing is triggered for the cartesian product of the given clusters and document types. */ - public void reindex(ApplicationId id, ZoneId zoneId, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly, Double speed) { - configServer.reindex(new DeploymentId(id, zoneId), clusterNames, documentTypes, indexedOnly, speed); + public void reindex(ApplicationId id, ZoneId zoneId, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly, Double speed, String cause) { + configServer.reindex(new DeploymentId(id, zoneId), clusterNames, documentTypes, indexedOnly, speed, cause); } /** Returns the reindexing status for the given application in the given zone. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 8d983714be4..71e781ed5ed 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -77,7 +77,7 @@ import static java.util.stream.Collectors.toUnmodifiableList; public class DeploymentStatus { private static <T> List<T> union(List<T> first, List<T> second) { - return Stream.concat(first.stream(), second.stream()).distinct().collect(toUnmodifiableList()); + return Stream.concat(first.stream(), second.stream()).distinct().toList(); } private final Application application; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java index 1ee13bbd0c0..400673bfd0c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java @@ -50,7 +50,8 @@ public class ReindexingTriggerer extends ControllerMaintainer { for (Deployment deployment : deployments) if ( inWindowOfOpportunity(now, id, deployment.zone()) && reindexingIsReady(controller().applications().applicationReindexing(id, deployment.zone()), now)) - controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true, speed); + controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true, speed, + "bakground reindexing, to account for changes in built-in linguistics components"); }); return 1.0; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 001c0b8e522..a10f79d1727 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -2229,7 +2229,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { Double speed = request.hasProperty("speed") ? Double.parseDouble(request.getProperty("speed")) : null; boolean indexedOnly = request.getBooleanProperty("indexedOnly"); - controller.applications().reindex(id, zone, clusterNames, documentTypes, indexedOnly, speed); + controller.applications().reindex(id, zone, clusterNames, documentTypes, indexedOnly, speed, "reindexing triggered by " + requireUserPrincipal(request).getName()); return new MessageResponse("Requested reindexing of " + id + " in " + zone + (clusterNames.isEmpty() ? "" : ", on clusters " + String.join(", ", clusterNames)) + (documentTypes.isEmpty() ? "" : ", for types " + String.join(", ", documentTypes)) + @@ -2281,6 +2281,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { status.message().ifPresent(message -> statusObject.setString("message", message)); status.progress().ifPresent(progress -> statusObject.setDouble("progress", progress)); status.speed().ifPresent(speed -> statusObject.setDouble("speed", speed)); + status.cause().ifPresent(cause -> statusObject.setString("cause", cause)); } private static String toString(ApplicationReindexing.State state) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 999cf63abf6..9c529230a7f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -169,6 +169,7 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { instanceObject.setString("application", instance.application().value()); instanceObject.setString("instance", instance.instance().value()); instanceObject.setBool("upgrading", status.application().require(instance.instance()).change().platform().equals(Optional.of(statistics.version()))); + instanceObject.setBool("pinned", status.application().require(instance.instance()).change().isPinned()); DeploymentStatus.StepStatus stepStatus = status.instanceSteps().get(instance.instance()); if (stepStatus != null) { // Instance may not have any steps, i.e. an empty deployment spec has been submitted stepStatus.blockedUntil(Change.of(statistics.version())) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index c76ed8e8c46..21f76ac4303 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -440,7 +440,7 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer } @Override - public void reindex(DeploymentId deployment, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly, Double speed) { } + public void reindex(DeploymentId deployment, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly, Double speed, String cause) { } @Override public ApplicationReindexing getReindexing(DeploymentId deployment) { @@ -453,7 +453,8 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer ApplicationReindexing.State.FAILED, "(#`д´)ノ", 0.1, - 1.0))))); + 1.0, + "test reindexing"))))); } @Override diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggererTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggererTest.java index b0601dcd880..ecb5bf167d0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggererTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggererTest.java @@ -68,7 +68,7 @@ public class ReindexingTriggererTest { Map.of("cluster", new Cluster(Map.of(), Map.of("type", - new Status(then, then, null, null, null, null, 1.0))))); + new Status(then, then, null, null, null, null, 1.0, null))))); assertFalse(reindexingIsReady(reindexing, now), "Should not be ready when reindexing is already running"); @@ -76,7 +76,7 @@ public class ReindexingTriggererTest { Map.of("cluster", new Cluster(Map.of("type", 123L), Map.of("type", - new Status(then, then, now, null, null, null, 1.0))))); + new Status(then, then, now, null, null, null, 1.0, null))))); assertTrue(reindexingIsReady(reindexing, now), "Should be ready when reindexing is no longer running"); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index 40d96f716ae..f1381ead221 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -630,7 +630,7 @@ public class ApplicationApiTest extends ControllerContainerTest { // GET to get reindexing status tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/environment/prod/region/us-central-1/reindexing", GET) .userIdentity(USER_ID), - "{\"enabled\":true,\"clusters\":[{\"name\":\"cluster\",\"pending\":[{\"type\":\"type\",\"requiredGeneration\":100}],\"ready\":[{\"type\":\"type\",\"readyAtMillis\":345,\"startedAtMillis\":456,\"endedAtMillis\":567,\"state\":\"failed\",\"message\":\"(#`д´)ノ\",\"progress\":0.1,\"speed\":1.0}]}]}"); + "{\"enabled\":true,\"clusters\":[{\"name\":\"cluster\",\"pending\":[{\"type\":\"type\",\"requiredGeneration\":100}],\"ready\":[{\"type\":\"type\",\"readyAtMillis\":345,\"startedAtMillis\":456,\"endedAtMillis\":567,\"state\":\"failed\",\"message\":\"(#`д´)ノ\",\"progress\":0.1,\"speed\":1.0,\"cause\":\"test reindexing\"}]}]}"); // POST to request a service dump tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/environment/prod/region/us-central-1/node/host-tenant1.application1.instance1-prod.us-central-1/service-dump", POST) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json index 864fb628e92..51398daa1d4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json @@ -36,6 +36,7 @@ "application": "application1", "instance": "default", "upgrading": false, + "pinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -77,6 +78,7 @@ "application": "application2", "instance": "i2", "upgrading": false, + "pinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -185,6 +187,7 @@ "application": "application1", "instance": "default", "upgrading": true, + "pinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -265,6 +268,7 @@ "application": "application2", "instance": "i1", "upgrading": false, + "pinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -324,6 +328,7 @@ "application": "application2", "instance": "i2", "upgrading": true, + "pinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ diff --git a/fastos/src/vespa/fastos/thread.cpp b/fastos/src/vespa/fastos/thread.cpp index 7695ce3342e..d172977b222 100644 --- a/fastos/src/vespa/fastos/thread.cpp +++ b/fastos/src/vespa/fastos/thread.cpp @@ -308,7 +308,7 @@ void FastOS_ThreadInterface::Dispatch(FastOS_Runnable *newOwner, void *arg) _owner = newOwner; _startArg = arg; // Set _thread variable before NewThread returns - _owner->_thread = this; + _owner->_thread.store(this, std::memory_order_release); // It is safe to signal after the unlock since _liveCond is still held // so the signalled thread still exists. diff --git a/fastos/src/vespa/fastos/thread.h b/fastos/src/vespa/fastos/thread.h index 95737e9d079..a8cf12825eb 100644 --- a/fastos/src/vespa/fastos/thread.h +++ b/fastos/src/vespa/fastos/thread.h @@ -455,7 +455,7 @@ class FastOS_Runnable { private: friend class FastOS_ThreadInterface; - FastOS_ThreadInterface *_thread; + std::atomic<FastOS_ThreadInterface*> _thread; public: FastOS_Runnable(const FastOS_Runnable&) = delete; @@ -482,9 +482,9 @@ public: */ virtual void Run(FastOS_ThreadInterface *thisThread, void *arguments)=0; - FastOS_ThreadInterface *GetThread() { return _thread; } - const FastOS_ThreadInterface *GetThread() const { return _thread; } - bool HasThread() const { return _thread != nullptr; } + FastOS_ThreadInterface *GetThread() noexcept { return _thread.load(std::memory_order_acquire); } + const FastOS_ThreadInterface *GetThread() const noexcept { return _thread.load(std::memory_order_acquire); } + bool HasThread() const noexcept { return GetThread() != nullptr; } }; #include <vespa/fastos/unix_thread.h> diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 7934215c165..0cf8f3756b6 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -68,13 +68,6 @@ public class Flags { "Takes effect at redeployment (requires restart)", ZONE_ID, APPLICATION_ID); - public static final UnboundBooleanFlag KEEP_STORAGE_NODE_UP = defineFeatureFlag( - "keep-storage-node-up", true, - List.of("hakonhall"), "2022-07-07", "2022-12-07", - "Whether to leave the storage node (with wanted state) UP while the node is permanently down.", - "Takes effect immediately for nodes transitioning to permanently down.", - ZONE_ID, APPLICATION_ID); - public static final UnboundIntFlag MAX_UNCOMMITTED_MEMORY = defineIntFlag( "max-uncommitted-memory", 130000, List.of("geirst, baldersheim"), "2021-10-21", "2023-01-01", diff --git a/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java b/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java index 6d98c3a3283..b24d5f62174 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java @@ -19,37 +19,47 @@ import java.util.OptionalDouble; @JsonInclude(value = JsonInclude.Include.NON_NULL) public class ClusterCapacity { private final int count; - private final double vcpu; - private final double memoryGb; - private final double diskGb; + private final OptionalDouble vcpu; + private final OptionalDouble memoryGb; + private final OptionalDouble diskGb; private final OptionalDouble bandwidthGbps; @JsonCreator public ClusterCapacity(@JsonProperty("count") int count, - @JsonProperty("vcpu") double vcpu, - @JsonProperty("memoryGb") double memoryGb, - @JsonProperty("diskGb") double diskGb, + @JsonProperty("vcpu") Double vcpu, + @JsonProperty("memoryGb") Double memoryGb, + @JsonProperty("diskGb") Double diskGb, @JsonProperty("bandwidthGbps") Double bandwidthGbps) { this.count = (int) requireNonNegative("count", count); - this.vcpu = requireNonNegative("vcpu", vcpu); - this.memoryGb = requireNonNegative("memoryGb", memoryGb); - this.diskGb = requireNonNegative("diskGb", diskGb); + this.vcpu = vcpu == null ? OptionalDouble.empty() : OptionalDouble.of(requireNonNegative("vcpu", vcpu)); + this.memoryGb = memoryGb == null ? OptionalDouble.empty() : OptionalDouble.of(requireNonNegative("memoryGb", memoryGb)); + this.diskGb = diskGb == null ? OptionalDouble.empty() : OptionalDouble.of(requireNonNegative("diskGb", diskGb)); this.bandwidthGbps = bandwidthGbps == null ? OptionalDouble.empty() : OptionalDouble.of(bandwidthGbps); } /** Returns a new ClusterCapacity equal to {@code this}, but with the given count. */ public ClusterCapacity withCount(int count) { - return new ClusterCapacity(count, vcpu, memoryGb, diskGb, bandwidthGbpsOrNull()); + return new ClusterCapacity(count, vcpuOrNull(), memoryGbOrNull(), diskGbOrNull(), bandwidthGbpsOrNull()); } @JsonGetter("count") public int count() { return count; } - @JsonGetter("vcpu") public double vcpu() { return vcpu; } - @JsonGetter("memoryGb") public double memoryGb() { return memoryGb; } - @JsonGetter("diskGb") public double diskGb() { return diskGb; } + @JsonGetter("vcpu") public Double vcpuOrNull() { + return vcpu.isPresent() ? vcpu.getAsDouble() : null; + } + @JsonGetter("memoryGb") public Double memoryGbOrNull() { + return memoryGb.isPresent() ? memoryGb.getAsDouble() : null; + } + @JsonGetter("diskGb") public Double diskGbOrNull() { + return diskGb.isPresent() ? diskGb.getAsDouble() : null; + } @JsonGetter("bandwidthGbps") public Double bandwidthGbpsOrNull() { return bandwidthGbps.isPresent() ? bandwidthGbps.getAsDouble() : null; } + @JsonIgnore public Double vcpu() { return vcpu.orElse(0.0); } + @JsonIgnore public Double memoryGb() { return memoryGb.orElse(0.0); } + @JsonIgnore public Double diskGb() { return diskGb.orElse(0.0); } + @JsonIgnore public double bandwidthGbps() { return bandwidthGbps.orElse(1.0); } @@ -70,9 +80,9 @@ public class ClusterCapacity { if (o == null || getClass() != o.getClass()) return false; ClusterCapacity that = (ClusterCapacity) o; return count == that.count && - Double.compare(that.vcpu, vcpu) == 0 && - Double.compare(that.memoryGb, memoryGb) == 0 && - Double.compare(that.diskGb, diskGb) == 0 && + vcpu.equals(that.vcpu) && + memoryGb.equals(that.memoryGb) && + diskGb.equals(that.diskGb) && bandwidthGbps.equals(that.bandwidthGbps); } @@ -86,4 +96,5 @@ public class ClusterCapacity { throw new IllegalArgumentException("'" + name + "' must be positive, was " + value); return value; } + } diff --git a/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java b/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java index 189e3fd79c9..796cdbf30d1 100644 --- a/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java +++ b/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java @@ -9,6 +9,7 @@ import java.io.IOException; import static org.junit.jupiter.api.Assertions.assertEquals; public class ClusterCapacityTest { + @Test void serialization() throws IOException { ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, null); @@ -38,4 +39,20 @@ public class ClusterCapacityTest { assertEquals(2.3, deserialized.bandwidthGbps(), 0.0001); assertEquals(7, deserialized.count()); } + + @Test + void serializationWithNoNodeResources() throws IOException { + ClusterCapacity clusterCapacity = new ClusterCapacity(7, null, null, null, null); + ObjectMapper mapper = new ObjectMapper(); + String json = mapper.writeValueAsString(clusterCapacity); + assertEquals("{\"count\":7}", json); + + ClusterCapacity deserialized = mapper.readValue(json, ClusterCapacity.class); + assertEquals(0.0, deserialized.vcpu(), 0.0001); + assertEquals(0.0, deserialized.memoryGb(), 0.0001); + assertEquals(0.0, deserialized.diskGb(), 0.0001); + assertEquals(1.0, deserialized.bandwidthGbps(), 0.0001); // 1.0 is used as fallback + assertEquals(7, deserialized.count()); + } + }
\ No newline at end of file diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java index c45d53ae97b..c4c9dd3f591 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepository.java @@ -25,4 +25,8 @@ public interface NodeRepository { void updateNodeAttributes(String hostName, NodeAttributes nodeAttributes); void setNodeState(String hostName, NodeState nodeState); + + default void reboot(String hostname) { + throw new UnsupportedOperationException("Rebooting not supported in " + getClass().getName()); + } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java index 793bae9e2ab..36a4703a415 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java @@ -133,6 +133,14 @@ public class RealNodeRepository implements NodeRepository { response.throwOnError("Failed to set node state"); } + @Override + public void reboot(String hostname) { + String uri = "/nodes/v2/command/reboot?hostname=" + hostname; + StandardConfigServerResponse response = configServerApi.post(uri, Optional.empty(), StandardConfigServerResponse.class); + logger.info(response.message); + response.throwOnError("Failed to reboot " + hostname); + } + private static NodeSpec createNodeSpec(NodeRepositoryNode node) { Objects.requireNonNull(node.type, "Unknown node type"); NodeType nodeType = NodeType.valueOf(node.type); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index db5771cd623..0facc6d37ea 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -31,8 +31,10 @@ public class ClusterModel { static final double idealQueryCpuLoad = 0.8; static final double idealWriteCpuLoad = 0.95; - static final double idealMemoryLoad = 0.65; - static final double idealAdminMemoryLoad = 0.75; + + static final double idealContainerMemoryLoad = 0.8; + static final double idealContentMemoryLoad = 0.65; + static final double idealContainerDiskLoad = 0.95; static final double idealContentDiskLoad = 0.6; @@ -111,7 +113,6 @@ public class ClusterModel { /** Are we in a position to make decisions to scale down at this point? */ private boolean safeToScaleDown() { if (hasScaledIn(scalingDuration().multipliedBy(3))) return false; - if (nodeTimeseries().measurementsPerNode() < 4) return false; if (nodeTimeseries().nodesMeasured() != nodeCount()) return false; return true; } @@ -309,9 +310,9 @@ public class ClusterModel { } private double idealMemoryLoad() { - if (clusterSpec.type() == ClusterSpec.Type.admin) - return idealAdminMemoryLoad; // Not autoscaled, but ideal shown in console - return idealMemoryLoad; + if (clusterSpec.type().isContainer()) return idealContainerMemoryLoad; + if (clusterSpec.type() == ClusterSpec.Type.admin) return idealContainerMemoryLoad; // Not autoscaled, but ideal shown in console + return idealContentMemoryLoad; } private double idealDiskLoad() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 47975c8354a..37583f00547 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -70,8 +70,7 @@ public class CuratorDatabaseClient { private static final Path firmwareCheckPath = root.append("firmwareCheck"); private static final Path archiveUrisPath = root.append("archiveUris"); - // TODO: Explain reasoning behind timeout value (why its it as high as 10 minutes?) - private static final Duration defaultLockTimeout = Duration.ofMinutes(10); + private static final Duration defaultLockTimeout = Duration.ofMinutes(1); private final NodeSerializer nodeSerializer; private final CuratorDatabase db; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index ff72d22bb39..eda677c6e59 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -78,6 +78,25 @@ public class AutoscalingTest { } @Test + public void test_container_scaling_down_exclusive() { + var min = new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1)); + var now = new ClusterResources(8, 1, new NodeResources(4, 8, 50, 0.1)); + var max = new ClusterResources(8, 1, new NodeResources(4, 8, 50, 0.1)); + var fixture = AutoscalingTester.fixture() + .awsProdSetup(false) + .clusterType(ClusterSpec.Type.container) + .initialResources(Optional.of(now)) + .capacity(Capacity.from(min, max)) + .build(); + fixture.tester().setScalingDuration(fixture.applicationId(), fixture.clusterSpec.id(), Duration.ofMinutes(5)); + + fixture.loader().applyLoad(new Load(0.01, 0.38, 0), 5); + fixture.tester().assertResources("Scaling down", + 2, 1, 4, 8, 50, + fixture.autoscale()); + } + + @Test public void initial_deployment_with_host_sharing_flag() { var min = new ClusterResources(7, 1, new NodeResources(2.0, 10.0, 384.0, 0.1)); var max = new ClusterResources(7, 1, new NodeResources(2.4, 32.0, 768.0, 0.1)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 2d05754d96e..5ceb28d3fed 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -178,7 +178,11 @@ public class AutoscalingMaintainerTest { } assertEquals(Cluster.maxScalingEvents, tester.cluster(app1, cluster1).scalingEvents().size()); - assertEquals("The latest rescaling is the last event stored", + + // Complete last event + tester.addMeasurements(0.1f, 0.1f, 0.1f, 20, 1, app1); + tester.maintainer().maintain(); + assertEquals("Last event is completed", tester.clock().instant(), tester.cluster(app1, cluster1).scalingEvents().get(Cluster.maxScalingEvents - 1).completion().get()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index a1b2fb5d0ad..375517278b2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -46,7 +46,6 @@ import com.yahoo.vespa.service.duper.ControllerHostApplication; import com.yahoo.vespa.service.duper.InfraApplication; import com.yahoo.vespa.service.duper.TenantHostApplication; import org.junit.Test; - import java.time.Duration; import java.time.Instant; import java.util.Collections; @@ -55,7 +54,6 @@ import java.util.List; import java.util.Optional; import java.util.Set; import java.util.function.Supplier; -import java.util.stream.Collectors; import java.util.stream.Stream; import static com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner.Behaviour; @@ -84,7 +82,7 @@ public class HostCapacityMaintainerTest { @Test public void does_not_deprovision_when_preprovisioning_enabled() { var tester = new DynamicProvisioningTester().addInitialNodes(); - tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1, 3, 2, 1.0)), ClusterCapacity.class); + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0)), ClusterCapacity.class); Optional<Node> failedHost = tester.nodeRepository.nodes().node("host2"); assertTrue(failedHost.isPresent()); @@ -97,8 +95,8 @@ public class HostCapacityMaintainerTest { public void provision_deficit_and_deprovision_excess() { var tester = new DynamicProvisioningTester().addInitialNodes(); tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(2, 48, 128, 1000, 10.0), - new ClusterCapacity(1, 16, 24, 100, 1.0)), + List.of(new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0), + new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0)), ClusterCapacity.class); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); @@ -129,7 +127,7 @@ public class HostCapacityMaintainerTest { tester.hostProvisioner.overrideHostFlavor("host4"); tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(2, 1, 30, 20, 3.0)), + List.of(new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0)), ClusterCapacity.class); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); @@ -151,8 +149,8 @@ public class HostCapacityMaintainerTest { // Add a second cluster equal to the first. It should fit on existing host3 and host100. tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(2, 1, 30, 20, 3.0), - new ClusterCapacity(2, 1, 30, 20, 3.0)), + List.of(new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0), + new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0)), ClusterCapacity.class); tester.maintain(); @@ -165,8 +163,8 @@ public class HostCapacityMaintainerTest { // host3 is a 24-64-100-10 while host100 is 48-128-1000-10. tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(2, 1, 30, 20, 3.0), - new ClusterCapacity(2, 24, 64, 100, 1.0)), + List.of(new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0), + new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0)), ClusterCapacity.class); tester.maintain(); @@ -181,7 +179,7 @@ public class HostCapacityMaintainerTest { // If the preprovision capacity is reduced, we should see shared hosts deprovisioned. tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(1, 1, 30, 20, 3.0)), + List.of(new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0)), ClusterCapacity.class); tester.maintain(); @@ -289,7 +287,7 @@ public class HostCapacityMaintainerTest { // Must be able to allocate 2 nodes with "no resource requirement" tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(2, 0, 0, 0, 0.0)), + List.of(new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0)), ClusterCapacity.class); // Next maintenance run does nothing @@ -314,7 +312,7 @@ public class HostCapacityMaintainerTest { // Increasing the capacity provisions additional hosts tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(3, 0, 0, 0, 0.0)), + List.of(new ClusterCapacity(3, 0.0, 0.0, 0.0, 0.0)), ClusterCapacity.class); assertEquals(0, tester.provisionedHostsMatching(sharedHostNodeResources)); assertTrue(tester.nodeRepository.nodes().node("host102").isEmpty()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 508168261df..b43baf444c8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -70,7 +70,7 @@ public class ScalingSuggestionsMaintainerTest { new TestMetric()); maintainer.maintain(); - assertEquals("12 nodes with [vcpu: 6.0, memory: 5.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]", + assertEquals("13 nodes with [vcpu: 5.5, memory: 4.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]", suggestionOf(app1, cluster1, tester).get().resources().toString()); assertEquals("8 nodes with [vcpu: 11.0, memory: 4.4 Gb, disk 11.8 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]", suggestionOf(app2, cluster2, tester).get().resources().toString()); @@ -80,7 +80,7 @@ public class ScalingSuggestionsMaintainerTest { addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository()); maintainer.maintain(); assertEquals("Suggestion stays at the peak value observed", - "12 nodes with [vcpu: 6.0, memory: 5.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]", + "13 nodes with [vcpu: 5.5, memory: 4.5 Gb, disk 10.0 Gb, bandwidth: 0.1 Gbps, architecture: x86_64]", suggestionOf(app1, cluster1, tester).get().resources().toString()); // Utilization is still way down and a week has passed tester.clock().advance(Duration.ofDays(7)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json index 9ae495a7396..6adcb1199eb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json @@ -75,7 +75,7 @@ "currentCpu": 0.0, "peakCpu": 0.0, "memory" : 0.0, - "idealMemory": 0.65, + "idealMemory": 0.8, "currentMemory": 0.0, "peakMemory": 0.0, "disk" : 0.0, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index 3f23be5e514..a54ca98d4df 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -34,7 +34,6 @@ public class HostedVespaPolicy implements Policy { private final HostedVespaClusterPolicy clusterPolicy; private final ClusterControllerClientFactory clusterControllerClientFactory; private final ApplicationApiFactory applicationApiFactory; - private final BooleanFlag keepStorageNodeUpFlag; public HostedVespaPolicy(HostedVespaClusterPolicy clusterPolicy, ClusterControllerClientFactory clusterControllerClientFactory, @@ -43,7 +42,6 @@ public class HostedVespaPolicy implements Policy { this.clusterPolicy = clusterPolicy; this.clusterControllerClientFactory = clusterControllerClientFactory; this.applicationApiFactory = applicationApiFactory; - this.keepStorageNodeUpFlag = Flags.KEEP_STORAGE_NODE_UP.bindTo(flagSource); } @Override @@ -101,18 +99,11 @@ public class HostedVespaPolicy implements Policy { clusterPolicy.verifyGroupGoingDownPermanentlyIsFine(cluster); } - boolean keepStorageNodeUp = keepStorageNodeUpFlag - .with(FetchVector.Dimension.APPLICATION_ID, applicationApi.applicationId().serializedForm()) - .value(); - // Get permission from the Cluster Controller to remove the content nodes. for (StorageNode storageNode : applicationApi.getStorageNodesInGroupInClusterOrder()) { - if (keepStorageNodeUp) { - storageNode.setStorageNodeState(context.createSubcontextForSingleAppOp(true), ClusterControllerNodeState.DOWN); - storageNode.forceDistributorState(context, ClusterControllerNodeState.DOWN); - } else { - storageNode.setStorageNodeState(context, ClusterControllerNodeState.DOWN); - } + // Consider changing the semantics of setting storage node state to DOWN in cluster controller, to avoid 2 calls. + storageNode.setStorageNodeState(context.createSubcontextForSingleAppOp(true), ClusterControllerNodeState.DOWN); + storageNode.forceDistributorState(context, ClusterControllerNodeState.DOWN); } // Ensure all nodes in the group are marked as permanently down diff --git a/searchcore/src/tests/proton/attribute/attribute_manager/attribute_manager_test.cpp b/searchcore/src/tests/proton/attribute/attribute_manager/attribute_manager_test.cpp index 0e3445d0785..a66785cb567 100644 --- a/searchcore/src/tests/proton/attribute/attribute_manager/attribute_manager_test.cpp +++ b/searchcore/src/tests/proton/attribute/attribute_manager/attribute_manager_test.cpp @@ -865,6 +865,15 @@ TEST_F("require that shrink flushtarget is handed over to new attribute manager" EXPECT_EQUAL(am1->getShrinker("a1"), am3->getShrinker("a1")); } +TEST_F("transient resource usage is zero in steady state", Fixture) +{ + f.addAttribute("a1"); + f.addAttribute("a2"); + auto usage = f._m.get_transient_resource_usage(); + EXPECT_EQUAL(0u, usage.disk()); + EXPECT_EQUAL(0u, usage.memory()); +} + TEST_MAIN() { std::filesystem::remove_all(std::filesystem::path(test_dir)); diff --git a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp index 12477469e04..43717fc724f 100644 --- a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp +++ b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp @@ -749,6 +749,13 @@ TEST_F("require that flush targets can be retrieved", SearchableFixture) EXPECT_TRUE(assertTarget("subdb.summary.shrink", FType::GC, FComponent::DOCUMENT_STORE, *targets[9])); } +TEST_F("transient resource usage is zero in steady state", SearchableFixture) +{ + auto usage = f._subDb.get_transient_resource_usage(); + EXPECT_EQUAL(0u, usage.disk()); + EXPECT_EQUAL(0u, usage.memory()); +} + TEST_F("require that only fast-access attributes are instantiated", FastAccessOnlyFixture) { std::vector<AttributeGuard> attrs; diff --git a/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp b/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp index 399ebf171af..a45fb7591dc 100644 --- a/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/documentmetastore/lid_allocator.h> +#include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/vespalib/util/generationholder.h> @@ -8,7 +9,9 @@ #include <vespa/vespalib/gtest/gtest.h> #include <iostream> +using search::fef::MatchData; using search::queryeval::Blueprint; +using search::queryeval::SearchIterator; using search::queryeval::SimpleResult; using vespalib::GenerationHolder; using vespalib::Timer; @@ -99,19 +102,25 @@ protected: return result; } - SimpleResult get_active_lids_in_search_iterator(uint32_t docid_limit) { + SimpleResult get_active_lids_in_search_iterator(uint32_t docid_limit, bool filter) { auto blueprint = _allocator.createWhiteListBlueprint(); blueprint->setDocIdLimit(docid_limit); - auto iterator = blueprint->createFilterSearch(true, search::queryeval::Blueprint::FilterConstraint::UPPER_BOUND); + std::unique_ptr<SearchIterator> iterator; + MatchData md(MatchData::params()); + if (filter) { + iterator = blueprint->createFilterSearch(true, Blueprint::FilterConstraint::UPPER_BOUND); + } else { + iterator = blueprint->createSearch(md, true); + } SimpleResult res; res.search(*iterator, docid_limit); return res; } - Trinary search_iterator_matches_any(uint32_t docid_limit) { + Trinary filter_search_iterator_matches_any(uint32_t docid_limit) { auto blueprint = _allocator.createWhiteListBlueprint(); blueprint->setDocIdLimit(docid_limit); - auto iterator = blueprint->createFilterSearch(true, search::queryeval::Blueprint::FilterConstraint::UPPER_BOUND); + auto iterator = blueprint->createFilterSearch(true, Blueprint::FilterConstraint::UPPER_BOUND); return iterator->matches_any(); } @@ -146,16 +155,18 @@ TEST_F(LidAllocatorTest, active_lids_are_available_in_search_iterator) { register_lids({ 1, 2, 3, 4 }); activate_lids({ 1, 2, 4 }, true); - EXPECT_EQ(Trinary::Undefined, search_iterator_matches_any(5)); - EXPECT_EQ(SimpleResult({1, 2, 4}), get_active_lids_in_search_iterator(5)); + EXPECT_EQ(Trinary::Undefined, filter_search_iterator_matches_any(5)); + EXPECT_EQ(SimpleResult({1, 2, 4}), get_active_lids_in_search_iterator(5, true)); + EXPECT_EQ(SimpleResult({1, 2, 4}), get_active_lids_in_search_iterator(5, false)); } -TEST_F(LidAllocatorTest, search_iterator_matches_all_when_all_lids_are_active) +TEST_F(LidAllocatorTest, filter_search_iterator_matches_all_when_all_lids_are_active) { register_lids({ 1, 2, 3, 4 }); activate_lids({ 1, 2, 3, 4 }, true); - EXPECT_EQ(Trinary::True, search_iterator_matches_any(5)); - EXPECT_EQ(SimpleResult({1, 2, 3, 4}), get_active_lids_in_search_iterator(5)); + EXPECT_EQ(Trinary::True, filter_search_iterator_matches_any(6)); + EXPECT_EQ(SimpleResult({1, 2, 3, 4, 5}), get_active_lids_in_search_iterator(6, true)); + EXPECT_EQ(SimpleResult({1, 2, 3, 4}), get_active_lids_in_search_iterator(6, false)); } class LidAllocatorPerformanceTest : public LidAllocatorTest, diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.cpp index 939ae196de8..c03ae3b2f1f 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.cpp @@ -650,4 +650,18 @@ AttributeManager::readable_attribute_vector(const string& name) const return _importedAttributes->get(name); } +TransientResourceUsage +AttributeManager::get_transient_resource_usage() const +{ + // Transient disk usage is measured as the total disk usage of all attribute snapshots + // that are NOT the valid best one. + // Transient memory usage is zero. + TransientResourceUsage result; + for (const auto& elem : _flushables) { + auto usage = elem.second.getFlusher()->get_transient_resource_usage(); + result.merge(usage); + } + return result; +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.h b/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.h index b74e7e72a0e..65729767dbb 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/attributemanager.h @@ -186,6 +186,8 @@ public: const ImportedAttributesRepo *getImportedAttributes() const override { return _importedAttributes.get(); } std::shared_ptr<search::attribute::ReadableAttributeVector> readable_attribute_vector(const string& name) const override; + + TransientResourceUsage get_transient_resource_usage() const override; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/attribute/filter_attribute_manager.h b/searchcore/src/vespa/searchcore/proton/attribute/filter_attribute_manager.h index 1a0fdcb32aa..de35ab7394f 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/filter_attribute_manager.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/filter_attribute_manager.h @@ -58,6 +58,8 @@ public: std::shared_ptr<search::attribute::ReadableAttributeVector> readable_attribute_vector(const string& name) const override; void asyncForAttribute(const vespalib::string &name, std::unique_ptr<IAttributeFunctor> func) const override; + + TransientResourceUsage get_transient_resource_usage() const override { return {}; } }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp index f1b7eac3712..8101b29d98c 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp @@ -178,6 +178,12 @@ FlushableAttribute::FlushableAttribute(AttributeVectorSP attr, FlushableAttribute::~FlushableAttribute() = default; +TransientResourceUsage +FlushableAttribute::get_transient_resource_usage() const +{ + return _attrDir->get_transient_resource_usage(); +} + IFlushTarget::SerialNum FlushableAttribute::getFlushedSerialNum() const { diff --git a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h index 39d79372f25..e25422792fc 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h @@ -2,10 +2,9 @@ #pragma once +#include <vespa/searchcore/proton/common/hw_info.h> #include <vespa/searchcorespi/flush/iflushtarget.h> #include <vespa/searchlib/common/tunefileinfo.h> -#include <vespa/searchcore/proton/common/hw_info.h> - namespace search { class AttributeVector; } @@ -14,8 +13,8 @@ namespace vespalib { class ISequencedTaskExecutor; } namespace proton { - class AttributeDirectory; +class TransientResourceUsage; /** * Implementation of IFlushTarget interface for attribute vectors. @@ -59,11 +58,12 @@ public: vespalib::ISequencedTaskExecutor &attributeFieldWriter, const HwInfo &hwInfo); - virtual - ~FlushableAttribute(); + virtual ~FlushableAttribute(); void setCleanUpAfterFlush(bool cleanUp) { _cleanUpAfterFlush = cleanUp; } + TransientResourceUsage get_transient_resource_usage() const; + // Implements IFlushTarget virtual MemoryGain getApproxMemoryGain() const override; virtual DiskGain getApproxDiskGain() const override; diff --git a/searchcore/src/vespa/searchcore/proton/attribute/i_attribute_manager.h b/searchcore/src/vespa/searchcore/proton/attribute/i_attribute_manager.h index 4b6b8dc687c..ce163827d42 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/i_attribute_manager.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/i_attribute_manager.h @@ -6,6 +6,7 @@ #include "exclusive_attribute_read_accessor.h" #include "i_attribute_factory.h" #include <vespa/searchcommon/attribute/i_attribute_functor.h> +#include <vespa/searchcore/proton/common/i_transient_resource_usage_provider.h> #include <vespa/searchcorespi/flush/iflushtarget.h> #include <vespa/searchlib/attribute/iattributemanager.h> #include <vespa/searchlib/common/serialnum.h> @@ -105,6 +106,8 @@ struct IAttributeManager : public search::IAttributeManager virtual void setImportedAttributes(std::unique_ptr<ImportedAttributesRepo> attributes) = 0; virtual const ImportedAttributesRepo *getImportedAttributes() const = 0; + + virtual TransientResourceUsage get_transient_resource_usage() const = 0; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.cpp index 609ee585a6c..a712035e9af 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.cpp @@ -166,9 +166,13 @@ DocumentMetaStoreFlushTarget(const DocumentMetaStore::SP dms, ITlsSyncer &tlsSyn _lastStats.setPathElementsToLog(8); } - DocumentMetaStoreFlushTarget::~DocumentMetaStoreFlushTarget() = default; +TransientResourceUsage +DocumentMetaStoreFlushTarget::get_transient_resource_usage() const +{ + return _dmsDir->get_transient_resource_usage(); +} IFlushTarget::SerialNum DocumentMetaStoreFlushTarget::getFlushedSerialNum() const diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.h index 17072d28515..ef9f9299791 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastoreflushtarget.h @@ -10,10 +10,11 @@ namespace search::common { class FileHeaderContext; } namespace proton { -class ITlsSyncer; -class AttributeDiskLayout; class AttributeDirectory; +class AttributeDiskLayout; class DocumentMetaStore; +class ITlsSyncer; +class TransientResourceUsage; /** * Implementation of IFlushTarget interface for document meta store. @@ -54,6 +55,8 @@ public: void setCleanUpAfterFlush(bool cleanUp) { _cleanUpAfterFlush = cleanUp; } + TransientResourceUsage get_transient_resource_usage() const; + MemoryGain getApproxMemoryGain() const override; DiskGain getApproxDiskGain() const override; Time getLastFlushTime() const override; diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp index 1d02ed4f063..5efc639064e 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp @@ -188,12 +188,20 @@ private: mutable std::mutex _lock; mutable std::vector<search::fef::TermFieldMatchData *> _matchDataVector; + std::unique_ptr<SearchIterator> create_search_helper(bool strict) const { + auto tfmd = new search::fef::TermFieldMatchData; + { + std::lock_guard<std::mutex> lock(_lock); + _matchDataVector.push_back(tfmd); + } + return search::BitVectorIterator::create(&_activeLids, get_docid_limit(), *tfmd, strict); + } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override { assert(tfmda.size() == 0); (void) tfmda; - return createFilterSearch(strict, FilterConstraint::UPPER_BOUND); + return create_search_helper(strict); } public: WhiteListBlueprint(const search::BitVector &activeLids, bool all_lids_active) @@ -212,12 +220,7 @@ public: if (_all_lids_active) { return std::make_unique<FullSearch>(); } - auto tfmd = new search::fef::TermFieldMatchData; - { - std::lock_guard<std::mutex> lock(_lock); - _matchDataVector.push_back(tfmd); - } - return search::BitVectorIterator::create(&_activeLids, get_docid_limit(), *tfmd, strict); + return create_search_helper(strict); } ~WhiteListBlueprint() { diff --git a/searchcore/src/vespa/searchcore/proton/server/ddbstate.h b/searchcore/src/vespa/searchcore/proton/server/ddbstate.h index 5961ac4da62..20c0799c243 100644 --- a/searchcore/src/vespa/searchcore/proton/server/ddbstate.h +++ b/searchcore/src/vespa/searchcore/proton/server/ddbstate.h @@ -49,7 +49,7 @@ private: static std::vector<vespalib::string> _stateNames; static std::vector<vespalib::string> _configStateNames; - void set_state(State state) noexcept { _state.store(state, std::memory_order_relaxed); } + void set_state(State state) noexcept { _state.store(state, std::memory_order_release); } public: DDBState(); @@ -67,7 +67,7 @@ public: bool enterOnlineState(); void enterShutdownState(); void enterDeadState(); - State getState() const noexcept { return _state.load(std::memory_order_relaxed); } + State getState() const noexcept { return _state.load(std::memory_order_acquire); } static vespalib::string getStateString(State state); bool getClosed() const noexcept { diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp index 27a2db27369..e4d432bf4fd 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp @@ -110,11 +110,12 @@ public: explicit DocumentDBResourceUsageProvider(const DocumentDB& doc_db) noexcept : _doc_db(doc_db) {} + TransientResourceUsage get_transient_resource_usage() const override { - // Transient disk usage is measured as the total disk usage of all current fusion indexes. - // Transient memory usage is measured as the total memory usage of all memory indexes. - auto stats = _doc_db.getReadySubDB()->getSearchableStats(); - return {stats.fusion_size_on_disk(), stats.memoryUsage().allocatedBytes()}; + if (!_doc_db.get_state().get_load_done()) { + return {0, 0}; + } + return _doc_db.getReadySubDB()->get_transient_resource_usage(); } }; diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb.h b/searchcore/src/vespa/searchcore/proton/server/documentdb.h index ef885113079..29a25069367 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb.h @@ -422,6 +422,7 @@ public: ExecutorThreadingService & getWriteService() { return _writeService; } void set_attribute_usage_listener(std::unique_ptr<IAttributeUsageListener> listener); + const DDBState& get_state() const noexcept { return _state; } }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp b/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp index ebe20f24d92..d5475d1f904 100644 --- a/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp @@ -352,4 +352,12 @@ FastAccessDocSubDB::getNewestFlushedSerial() return highest; } +TransientResourceUsage +FastAccessDocSubDB::get_transient_resource_usage() const +{ + auto result = StoreOnlyDocSubDB::get_transient_resource_usage(); + result.merge(getAttributeManager()->get_transient_resource_usage()); + return result; +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.h b/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.h index d29c71ea43c..94fca94c75d 100644 --- a/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.h @@ -121,6 +121,7 @@ public: SerialNum getOldestFlushedSerial() override; SerialNum getNewestFlushedSerial() override; virtual void pruneRemovedFields(SerialNum serialNum) override; + TransientResourceUsage get_transient_resource_usage() const override; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h index f84352a4558..b945c67660b 100644 --- a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h @@ -28,11 +28,7 @@ class DocumentDBConfig; class DocumentSubDbInitializer; class DocumentSubDbInitializerResult; class FeedHandler; -struct IAttributeManager; -struct IBucketStateCalculator; -struct IDocumentDBReferenceResolver; class IDocumentDBReference; -struct IDocumentMetaStoreContext; class IDocumentRetriever; class IFeedView; class IIndexWriter; @@ -40,9 +36,14 @@ class IReplayConfig; class ISearchHandler; class ISummaryAdapter; class ISummaryManager; +class PendingLidTrackerBase; class ReconfigParams; class RemoveDocumentsOperation; -class PendingLidTrackerBase; +class TransientResourceUsage; +struct IAttributeManager; +struct IBucketStateCalculator; +struct IDocumentDBReferenceResolver; +struct IDocumentMetaStoreContext; /** * Interface for a document sub database that handles a subset of the documents that belong to a @@ -123,6 +124,7 @@ public: virtual void tearDownReferences(IDocumentDBReferenceResolver &resolver) = 0; virtual void validateDocStore(FeedHandler &op, SerialNum serialNum) const = 0; virtual PendingLidTrackerBase & getUncommittedLidsTracker() = 0; + virtual TransientResourceUsage get_transient_resource_usage() const = 0; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp index b623b461f6e..043e9cd5d3f 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.cpp @@ -343,4 +343,15 @@ SearchableDocSubDB::clearViews() { Parent::clearViews(); } +TransientResourceUsage +SearchableDocSubDB::get_transient_resource_usage() const +{ + auto result = FastAccessDocSubDB::get_transient_resource_usage(); + // Transient disk usage is measured as the total disk usage of all current fusion indexes. + // Transient memory usage is measured as the total memory usage of all memory indexes. + auto stats = getSearchableStats(); + result.merge({stats.fusion_size_on_disk(), stats.memoryUsage().allocatedBytes()}); + return result; +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h index d264a625e96..c628d9a96b7 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/searchabledocsubdb.h @@ -131,6 +131,7 @@ public: void close() override; std::shared_ptr<IDocumentDBReference> getDocumentDBReference() override; void tearDownReferences(IDocumentDBReferenceResolver &resolver) override; + TransientResourceUsage get_transient_resource_usage() const override; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp index 032307c1157..9419dfa1c90 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp @@ -585,4 +585,10 @@ addTags(vespalib::GenericHeader &header, const vespalib::string &name) const header.putTag(Tag("subDB", _subDB)); } +TransientResourceUsage +StoreOnlyDocSubDB::get_transient_resource_usage() const +{ + return _dmsFlushTarget->get_transient_resource_usage(); +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h index cb1f1ed07bb..f694cc7298f 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h @@ -236,6 +236,7 @@ public: PendingLidTrackerBase & getUncommittedLidsTracker() override { return *_pendingLidsForCommit; } vespalib::datastore::CompactionStrategy computeCompactionStrategy(vespalib::datastore::CompactionStrategy strategy) const; bool isNodeRetired() const { return _nodeRetired; } + TransientResourceUsage get_transient_resource_usage() const override; }; diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h index 6632fbc856a..6c142c97aaf 100644 --- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h +++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h @@ -103,6 +103,7 @@ struct DummyDocumentSubDb : public IDocumentSubDB } void tearDownReferences(IDocumentDBReferenceResolver &) override { } + TransientResourceUsage get_transient_resource_usage() const override { return {}; } }; } diff --git a/searchcore/src/vespa/searchcore/proton/test/mock_attribute_manager.h b/searchcore/src/vespa/searchcore/proton/test/mock_attribute_manager.h index 75bb3291dd0..987d60dff01 100644 --- a/searchcore/src/vespa/searchcore/proton/test/mock_attribute_manager.h +++ b/searchcore/src/vespa/searchcore/proton/test/mock_attribute_manager.h @@ -105,6 +105,7 @@ public: std::shared_ptr<search::attribute::ReadableAttributeVector> readable_attribute_vector(const string& name) const override { return _mock.readable_attribute_vector(name); } + TransientResourceUsage get_transient_resource_usage() const override { return {}; } }; } diff --git a/standalone-container/src/main/sh/standalone-container.sh b/standalone-container/src/main/sh/standalone-container.sh index 59a358240b6..f21144dce50 100755 --- a/standalone-container/src/main/sh/standalone-container.sh +++ b/standalone-container/src/main/sh/standalone-container.sh @@ -132,7 +132,7 @@ StartCommand() { # common setup export VESPA_SERVICE_NAME="$service" - if grep -q '"region": "cd-us-west-1"' /etc/vespa/host-admin.json; then + if grep -q 'region.*us-west' /etc/vespa/host-admin.json; then ${VESPA_HOME}/libexec/vespa/script-utils run-standalone-container "${jvm_arguments[@]}" & echo $! > "$pidfile" return diff --git a/vespa-feed-client-api/abi-spec.json b/vespa-feed-client-api/abi-spec.json index 4cb2a8ec67c..137c7f32bfe 100644 --- a/vespa-feed-client-api/abi-spec.json +++ b/vespa-feed-client-api/abi-spec.json @@ -176,6 +176,7 @@ "methods" : [ "public abstract int code()", "public abstract byte[] body()", + "public java.lang.String contentType()", "public static ai.vespa.feed.client.HttpResponse of(int, byte[])" ], "fields" : [ ] diff --git a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/HttpResponse.java b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/HttpResponse.java index 62850fef32d..dece21acd89 100644 --- a/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/HttpResponse.java +++ b/vespa-feed-client-api/src/main/java/ai/vespa/feed/client/HttpResponse.java @@ -6,6 +6,8 @@ public interface HttpResponse { int code(); byte[] body(); + default String contentType() { return "application/json"; } + static HttpResponse of(int code, byte[] body) { return new HttpResponse() { @Override public int code() { return code; } diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java index 6714f2bd590..3ffbaf136f2 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java @@ -188,9 +188,14 @@ class ApacheCluster implements Cluster { } @Override + public String contentType() { + return wrapped.getContentType().getMimeType(); + } + + @Override public String toString() { return "HTTP response with code " + code() + - (body() != null ? " and body '" + new String(body(), UTF_8) + "'" : ""); + (body() != null ? " and body '" + wrapped.getBodyText() + "'" : ""); } } diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java index 80a107fbdc6..8ee48548b31 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java @@ -15,16 +15,23 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import java.io.IOException; +import java.io.UncheckedIOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.StringJoiner; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Supplier; +import static ai.vespa.feed.client.OperationParameters.empty; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; @@ -51,6 +58,7 @@ class HttpFeedClient implements FeedClient { this.requestHeaders = new HashMap<>(builder.requestHeaders); this.requestStrategy = requestStrategy; this.speedTest = builder.speedTest; + verifyConnection(builder); } @Override @@ -113,6 +121,50 @@ class HttpFeedClient implements FeedClient { return promise; } + private void verifyConnection(FeedClientBuilderImpl builder) { + if (builder.dryrun) return; + try (Cluster cluster = new ApacheCluster(builder)) { + HttpRequest request = new HttpRequest("POST", + getPath(DocumentId.of("feeder", "handshake", "dummy")) + getQuery(empty(), true), + requestHeaders, + null, + Duration.ofSeconds(10)); + CompletableFuture<HttpResponse> future = new CompletableFuture<>(); + cluster.dispatch(request, future); + HttpResponse response = future.get(20, TimeUnit.SECONDS); + if (response.code() != 200) { + String message; + switch (response.contentType()) { case "application/json": message = parseMessage(request.body()); break; + case "text/plain": message = new String(request.body(), UTF_8); break; + default: message = response.toString(); break; + } + throw new FeedException("server responded non-OK to handshake: " + message); + } + } + catch (IOException e) { + throw new FeedException("failed initiating handshake with server: " + e, e); + } + catch (ExecutionException e) { + throw new FeedException("failed handshake with server: " + e.getCause(), e.getCause()); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new FeedException("interrupted during handshake with server", e); + } + catch (TimeoutException e) { + throw new FeedException("timed out during handshake with server", e); + } + } + + private static String parseMessage(byte[] json) { + try { + return parse(null, json).message; + } + catch (Exception e) { + return new String(json, UTF_8); + } + } + private enum Outcome { success, conditionNotMet, vespaFailure, transportFailure }; static Result.Type toResultType(Outcome outcome) { @@ -123,46 +175,48 @@ class HttpFeedClient implements FeedClient { } } - static Result toResult(HttpRequest request, HttpResponse response, DocumentId documentId) { - Outcome outcome; - switch (response.code()) { - case 200: outcome = Outcome.success; break; - case 412: outcome = Outcome.conditionNotMet; break; - case 502: - case 504: - case 507: outcome = Outcome.vespaFailure; break; - default: outcome = Outcome.transportFailure; + private static class MessageAndTrace { + final String message; + final String trace; + MessageAndTrace(String message, String trace) { + this.message = message; + this.trace = trace; } + } + static MessageAndTrace parse(DocumentId documentId, byte[] json) { String message = null; String trace = null; - try { - JsonParser parser = factory.createParser(response.body()); + try (JsonParser parser = factory.createParser(json)) { if (parser.nextToken() != JsonToken.START_OBJECT) throw new ResultParseException( documentId, "Expected '" + JsonToken.START_OBJECT + "', but found '" + parser.currentToken() + "' in: " + - new String(response.body(), UTF_8)); + new String(json, UTF_8)); String name; while ((name = parser.nextFieldName()) != null) { switch (name) { - case "message": message = parser.nextTextValue(); break; - case "trace": { + case "message": + message = parser.nextTextValue(); + break; + case "trace": if (parser.nextToken() != JsonToken.START_ARRAY) throw new ResultParseException(documentId, "Expected 'trace' to be an array, but got '" + parser.currentToken() + "' in: " + - new String(response.body(), UTF_8)); + new String(json, UTF_8)); int start = (int) parser.getTokenLocation().getByteOffset(); int depth = 1; while (depth > 0) switch (parser.nextToken()) { case START_ARRAY: ++depth; break; - case END_ARRAY: --depth; break; + case END_ARRAY: --depth; break; } int end = (int) parser.getTokenLocation().getByteOffset() + 1; - trace = new String(response.body(), start, end - start, UTF_8); - }; break; - default: parser.nextToken(); + trace = new String(json, start, end - start, UTF_8); + break; + default: + parser.nextToken(); + break; } } @@ -170,22 +224,38 @@ class HttpFeedClient implements FeedClient { throw new ResultParseException( documentId, "Expected '" + JsonToken.END_OBJECT + "', but found '" + parser.currentToken() + "' in: " - + new String(response.body(), UTF_8)); + + new String(json, UTF_8)); } catch (IOException e) { throw new ResultParseException(documentId, e); } + return new MessageAndTrace(message, trace); + } + + static Result toResult(HttpRequest request, HttpResponse response, DocumentId documentId) { + Outcome outcome; + switch (response.code()) { + case 200: outcome = Outcome.success; break; + case 412: outcome = Outcome.conditionNotMet; break; + case 502: + case 504: + case 507: outcome = Outcome.vespaFailure; break; + default: outcome = Outcome.transportFailure; break; + } + + MessageAndTrace mat = parse(documentId, response.body()); + if (outcome == Outcome.transportFailure) // Not a Vespa response, but a failure in the HTTP layer. throw new FeedException( documentId, "Status " + response.code() + " executing '" + request + "': " - + (message == null ? new String(response.body(), UTF_8) : message)); + + (mat.message == null ? new String(response.body(), UTF_8) : mat.message)); if (outcome == Outcome.vespaFailure) - throw new ResultException(documentId, message, trace); + throw new ResultException(documentId, mat.message, mat.trace); - return new ResultImpl(toResultType(outcome), documentId, message, trace); + return new ResultImpl(toResultType(outcome), documentId, mat.message, mat.trace); } static String getPath(DocumentId documentId) { diff --git a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/HttpFeedClientTest.java b/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/HttpFeedClientTest.java index 5353ab92fb6..a0b3049c2a0 100644 --- a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/HttpFeedClientTest.java +++ b/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/HttpFeedClientTest.java @@ -4,6 +4,7 @@ package ai.vespa.feed.client.impl; import ai.vespa.feed.client.DocumentId; import ai.vespa.feed.client.FeedClient; import ai.vespa.feed.client.FeedClientBuilder; +import ai.vespa.feed.client.FeedException; import ai.vespa.feed.client.HttpResponse; import ai.vespa.feed.client.OperationParameters; import ai.vespa.feed.client.OperationStats; @@ -42,7 +43,8 @@ class HttpFeedClientTest { @Override public void await() { throw new UnsupportedOperationException(); } @Override public CompletableFuture<HttpResponse> enqueue(DocumentId documentId, HttpRequest request) { return dispatch.get().apply(documentId, request); } } - FeedClient client = new HttpFeedClient(new FeedClientBuilderImpl(Collections.singletonList(URI.create("https://dummy:123"))), new MockRequestStrategy()); + FeedClient client = new HttpFeedClient(new FeedClientBuilderImpl(Collections.singletonList(URI.create("https://dummy:123"))).setDryrun(true), + new MockRequestStrategy()); // Update is a PUT, and 200 OK is a success. dispatch.set((documentId, request) -> { @@ -54,10 +56,11 @@ class HttpFeedClientTest { assertEquals("json", new String(request.body(), UTF_8)); HttpResponse response = HttpResponse.of(200, - ("{\n" + - " \"pathId\": \"/document/v1/ns/type/docid/0\",\n" + - " \"id\": \"id:ns:type::0\"\n" + - "}").getBytes(UTF_8)); + (""" + { + "pathId": "/document/v1/ns/type/docid/0", + "id": "id:ns:type::0" + }""").getBytes(UTF_8)); return CompletableFuture.completedFuture(response); } catch (Throwable thrown) { @@ -85,26 +88,27 @@ class HttpFeedClientTest { assertNull(request.body()); HttpResponse response = HttpResponse.of(412, - ("{\n" + - " \"pathId\": \"/document/v1/ns/type/docid/0\",\n" + - " \"id\": \"id:ns:type::0\",\n" + - " \"message\": \"Relax, take it easy.\",\n" + - " \"trace\": [\n" + - " {\n" + - " \"message\": \"For there is nothing that we can do.\"\n" + - " },\n" + - " {\n" + - " \"fork\": [\n" + - " {\n" + - " \"message\": \"Relax, take is easy.\"\n" + - " },\n" + - " {\n" + - " \"message\": \"Blame it on me or blame it on you.\"\n" + - " }\n" + - " ]\n" + - " }\n" + - " ]\n" + - "}").getBytes(UTF_8)); + (""" + { + "pathId": "/document/v1/ns/type/docid/0", + "id": "id:ns:type::0", + "message": "Relax, take it easy.", + "trace": [ + { + "message": "For there is nothing that we can do." + }, + { + "fork": [ + { + "message": "Relax, take is easy." + }, + { + "message": "Blame it on me or blame it on you." + } + ] + } + ] + }""").getBytes(UTF_8)); return CompletableFuture.completedFuture(response); } catch (Throwable thrown) { @@ -119,21 +123,22 @@ class HttpFeedClientTest { assertEquals(Result.Type.conditionNotMet, result.type()); assertEquals(id, result.documentId()); assertEquals(Optional.of("Relax, take it easy."), result.resultMessage()); - assertEquals(Optional.of("[\n" + - " {\n" + - " \"message\": \"For there is nothing that we can do.\"\n" + - " },\n" + - " {\n" + - " \"fork\": [\n" + - " {\n" + - " \"message\": \"Relax, take is easy.\"\n" + - " },\n" + - " {\n" + - " \"message\": \"Blame it on me or blame it on you.\"\n" + - " }\n" + - " ]\n" + - " }\n" + - " ]"), result.traceMessage()); + assertEquals(Optional.of(""" + [ + { + "message": "For there is nothing that we can do." + }, + { + "fork": [ + { + "message": "Relax, take is easy." + }, + { + "message": "Blame it on me or blame it on you." + } + ] + } + ]"""), result.traceMessage()); // Put is a POST, and a Vespa error is a ResultException. dispatch.set((documentId, request) -> { @@ -144,12 +149,13 @@ class HttpFeedClientTest { assertEquals("json", new String(request.body(), UTF_8)); HttpResponse response = HttpResponse.of(502, - ("{\n" + - " \"pathId\": \"/document/v1/ns/type/docid/0\",\n" + - " \"id\": \"id:ns:type::0\",\n" + - " \"message\": \"Ooops! ... I did it again.\",\n" + - " \"trace\": [ { \"message\": \"I played with your heart. Got lost in the game.\" } ]\n" + - "}").getBytes(UTF_8)); + (""" + { + "pathId": "/document/v1/ns/type/docid/0", + "id": "id:ns:type::0", + "message": "Ooops! ... I did it again.", + "trace": [ { "message": "I played with your heart. Got lost in the game." } ] + }""").getBytes(UTF_8)); return CompletableFuture.completedFuture(response); } catch (Throwable thrown) { @@ -181,12 +187,13 @@ class HttpFeedClientTest { assertEquals("json", new String(request.body(), UTF_8)); HttpResponse response = HttpResponse.of(500, - ("{\n" + - " \"pathId\": \"/document/v1/ns/type/docid/0\",\n" + - " \"id\": \"id:ns:type::0\",\n" + - " \"message\": \"Alla ska i jorden.\",\n" + - " \"trace\": [ { \"message\": \"Din tid den kom, och senn så for den.\" } ]\n" + - "}").getBytes(UTF_8)); + (""" + { + "pathId": "/document/v1/ns/type/docid/0", + "id": "id:ns:type::0", + "message": "Alla ska i jorden.", + "trace": [ { "message": "Din tid den kom, och senn så for den." } ] + }""").getBytes(UTF_8)); return CompletableFuture.completedFuture(response); } catch (Throwable thrown) { @@ -203,4 +210,11 @@ class HttpFeedClientTest { assertEquals("Status 500 executing 'POST /document/v1/ns/type/docid/0': Alla ska i jorden.", expected.getCause().getMessage()); } + @Test + void testHandshake() { + assertTrue(assertThrows(FeedException.class, + () -> new HttpFeedClient(new FeedClientBuilderImpl(Collections.singletonList(URI.create("https://dummy:123"))), null)) + .getMessage().startsWith("failed handshake with server: java.net.UnknownHostException")); + } + } diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java index 06cdccd17fc..6ad6fe2b1e2 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java @@ -116,6 +116,7 @@ import static com.yahoo.jdisc.http.HttpRequest.Method.GET; import static com.yahoo.jdisc.http.HttpRequest.Method.OPTIONS; import static com.yahoo.jdisc.http.HttpRequest.Method.POST; import static com.yahoo.jdisc.http.HttpRequest.Method.PUT; +import static com.yahoo.vespa.http.server.Headers.CLIENT_VERSION; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.logging.Level.FINE; @@ -449,7 +450,8 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { } private ContentChannel postDocument(HttpRequest request, DocumentPath path, ResponseHandler rawHandler) { - ResponseHandler handler = new MeasuringResponseHandler(rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.PUT, clock.instant()); + ResponseHandler handler = new MeasuringResponseHandler( + request, rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.PUT, clock.instant()); if (getProperty(request, DRY_RUN, booleanParser).orElse(false)) { handleFeedOperation(path, true, handler, new com.yahoo.documentapi.Response(-1)); return ignoredContent; @@ -462,7 +464,7 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { DocumentOperationParameters parameters = parametersFromRequest(request, ROUTE) .withResponseHandler(response -> { outstanding.decrementAndGet(); - updatePutMetrics(response.outcome()); + updatePutMetrics(response.outcome(), latencyOf(request)); handleFeedOperation(path, put.fullyApplied(), handler, response); }); return () -> dispatchOperation(() -> asyncSession.put((DocumentPut)put.operation(), parameters)); @@ -471,7 +473,8 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { } private ContentChannel putDocument(HttpRequest request, DocumentPath path, ResponseHandler rawHandler) { - ResponseHandler handler = new MeasuringResponseHandler(rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.UPDATE, clock.instant()); + ResponseHandler handler = new MeasuringResponseHandler( + request, rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.UPDATE, clock.instant()); if (getProperty(request, DRY_RUN, booleanParser).orElse(false)) { handleFeedOperation(path, true, handler, new com.yahoo.documentapi.Response(-1)); return ignoredContent; @@ -486,7 +489,7 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { DocumentOperationParameters parameters = parametersFromRequest(request, ROUTE) .withResponseHandler(response -> { outstanding.decrementAndGet(); - updateUpdateMetrics(response.outcome(), update.getCreateIfNonExistent()); + updateUpdateMetrics(response.outcome(), latencyOf(request), update.getCreateIfNonExistent()); handleFeedOperation(path, parsed.fullyApplied(), handler, response); }); return () -> dispatchOperation(() -> asyncSession.update(update, parameters)); @@ -495,7 +498,8 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { } private ContentChannel deleteDocument(HttpRequest request, DocumentPath path, ResponseHandler rawHandler) { - ResponseHandler handler = new MeasuringResponseHandler(rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.REMOVE, clock.instant()); + ResponseHandler handler = new MeasuringResponseHandler( + request, rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.REMOVE, clock.instant()); if (getProperty(request, DRY_RUN, booleanParser).orElse(false)) { handleFeedOperation(path, true, handler, new com.yahoo.documentapi.Response(-1)); return ignoredContent; @@ -507,7 +511,7 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { DocumentOperationParameters parameters = parametersFromRequest(request, ROUTE) .withResponseHandler(response -> { outstanding.decrementAndGet(); - updateRemoveMetrics(response.outcome()); + updateRemoveMetrics(response.outcome(), latencyOf(request)); handleFeedOperation(path, true, handler, response); }); return () -> dispatchOperation(() -> asyncSession.remove(remove, parameters)); @@ -1090,33 +1094,50 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { handle(path, null, handler, response, (document, jsonResponse) -> jsonResponse.commit(Response.Status.OK, fullyApplied)); } - private void updatePutMetrics(Outcome outcome) { + private static double latencyOf(HttpRequest r) { return (System.nanoTime() - r.relativeCreatedAtNanoTime()) / 1e+9d; } + + private void updatePutMetrics(Outcome outcome, double latency) { + incrementMetricNumOperations(); incrementMetricNumPuts(); sampleLatency(latency); switch (outcome) { - case SUCCESS -> metric.add(MetricNames.SUCCEEDED, 1, null); - case CONDITION_FAILED -> metric.add(MetricNames.CONDITION_NOT_MET, 1, null); - default -> metric.add(MetricNames.FAILED, 1, null); + case SUCCESS -> incrementMetricSucceeded(); + case CONDITION_FAILED -> { incrementMetricSucceeded(); incrementMetricConditionNotMet(); } + default -> incrementMetricFailed(); } } - private void updateUpdateMetrics(Outcome outcome, boolean create) { + private void updateUpdateMetrics(Outcome outcome, double latency, boolean create) { if (create && outcome == Outcome.NOT_FOUND) outcome = Outcome.SUCCESS; // >_< + incrementMetricNumOperations(); incrementMetricNumUpdates(); sampleLatency(latency); switch (outcome) { - case SUCCESS -> metric.add(MetricNames.SUCCEEDED, 1, null); - case NOT_FOUND -> metric.add(MetricNames.NOT_FOUND, 1, null); - case CONDITION_FAILED -> metric.add(MetricNames.CONDITION_NOT_MET, 1, null); - default -> metric.add(MetricNames.FAILED, 1, null); + case SUCCESS -> incrementMetricSucceeded(); + case NOT_FOUND -> { incrementMetricSucceeded(); incrementMetricNotFound(); } + case CONDITION_FAILED -> { incrementMetricSucceeded(); incrementMetricConditionNotMet(); } + default -> incrementMetricFailed(); } } - private void updateRemoveMetrics(Outcome outcome) { + private void updateRemoveMetrics(Outcome outcome, double latency) { + incrementMetricNumOperations(); incrementMetricNumRemoves(); sampleLatency(latency); switch (outcome) { - case SUCCESS: - case NOT_FOUND: metric.add(MetricNames.SUCCEEDED, 1, null); break; - case CONDITION_FAILED: metric.add(MetricNames.CONDITION_NOT_MET, 1, null); break; - default: metric.add(MetricNames.FAILED, 1, null); break; + case SUCCESS -> incrementMetricSucceeded(); + case NOT_FOUND -> { incrementMetricSucceeded(); incrementMetricNotFound(); } + case CONDITION_FAILED -> { incrementMetricSucceeded(); incrementMetricConditionNotMet(); } + default -> incrementMetricFailed(); } } + private void sampleLatency(double latency) { setMetric(MetricNames.LATENCY, latency); } + private void incrementMetricNumOperations() { incrementMetric(MetricNames.NUM_OPERATIONS); } + private void incrementMetricNumPuts() { incrementMetric(MetricNames.NUM_PUTS); } + private void incrementMetricNumRemoves() { incrementMetric(MetricNames.NUM_REMOVES); } + private void incrementMetricNumUpdates() { incrementMetric(MetricNames.NUM_UPDATES); } + private void incrementMetricFailed() { incrementMetric(MetricNames.FAILED); } + private void incrementMetricConditionNotMet() { incrementMetric(MetricNames.CONDITION_NOT_MET); } + private void incrementMetricSucceeded() { incrementMetric(MetricNames.SUCCEEDED); } + private void incrementMetricNotFound() { incrementMetric(MetricNames.NOT_FOUND); } + private void incrementMetric(String n) { metric.add(n, 1, null); } + private void setMetric(String n, Number v) { metric.set(n, v, null); } + // ------------------------------------------------- Visits ------------------------------------------------ private VisitorParameters parseGetParameters(HttpRequest request, DocumentPath path, boolean streamed) { @@ -1438,8 +1459,13 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { private final ResponseHandler delegate; private final com.yahoo.documentapi.metrics.DocumentOperationType type; private final Instant start; + private final HttpRequest request; - private MeasuringResponseHandler(ResponseHandler delegate, com.yahoo.documentapi.metrics.DocumentOperationType type, Instant start) { + private MeasuringResponseHandler(HttpRequest request, + ResponseHandler delegate, + com.yahoo.documentapi.metrics.DocumentOperationType type, + Instant start) { + this.request = request; this.delegate = delegate; this.type = type; this.start = start; @@ -1447,17 +1473,27 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { @Override public ContentChannel handleResponse(Response response) { - var statusCodeGroup = response.getStatus() / 100; - // Status code 412 - condition not met - is considered OK - if (statusCodeGroup == 2 || response.getStatus() == 412) - metrics.reportSuccessful(type, start); - else if (statusCodeGroup == 4) - metrics.reportFailure(type, DocumentOperationStatus.REQUEST_ERROR); - else if (statusCodeGroup == 5) - metrics.reportFailure(type, DocumentOperationStatus.SERVER_ERROR); + switch (response.getStatus()) { + case 200 -> report(DocumentOperationStatus.OK); + case 400 -> report(DocumentOperationStatus.REQUEST_ERROR); + case 404 -> report(DocumentOperationStatus.NOT_FOUND); + case 412 -> report(DocumentOperationStatus.CONDITION_FAILED); + case 429 -> report(DocumentOperationStatus.TOO_MANY_REQUESTS); + case 500,502,503,504,507 -> report(DocumentOperationStatus.SERVER_ERROR); + default -> throw new IllegalStateException("Unexpected status code '%s'".formatted(response.getStatus())); + } + metrics.reportHttpRequest(clientVersion()); return delegate.handleResponse(response); } + private void report(DocumentOperationStatus... status) { metrics.report(type, start, status); } + + private String clientVersion() { + return Optional.ofNullable(request.headers().get(Headers.CLIENT_VERSION)) + .filter(l -> !l.isEmpty()).map(l -> l.get(0)) + .orElse("unknown"); + } + } static class StorageCluster { diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentApiMetrics.java b/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentApiMetrics.java index 890f06dd094..f1335351ac6 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentApiMetrics.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentApiMetrics.java @@ -10,6 +10,7 @@ import com.yahoo.metrics.simple.Point; import java.time.Duration; import java.time.Instant; +import java.util.EnumSet; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ExecutionException; @@ -52,16 +53,22 @@ public class DocumentApiMetrics { feeds.add(point); } - public void reportSuccessful(DocumentOperationType documentOperationType, Instant startTime) { - double latency = Duration.between(startTime, Instant.now()).toMillis() / 1000.0d; - reportSuccessful(documentOperationType, latency); - } - public void reportFailure(DocumentOperationType documentOperationType, DocumentOperationStatus documentOperationStatus) { Point point = points.get(documentOperationStatus).get(documentOperationType); feeds.add(point); } + public void report(DocumentOperationType type, Instant start, DocumentOperationStatus... status) { + double latency = latency(start); + for (var s : status) { + var point = points.get(s).get(type); + feedLatency.sample(latency, point); + feeds.add(point); + } + } + + private static double latency(Instant start) { return Duration.between(start, Instant.now()).toMillis() / 1000d; } + public void reportHttpRequest(String clientVersion) { if (clientVersion != null) { try { diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationStatus.java b/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationStatus.java index 97e5e9e63d8..f22eedeb5bc 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationStatus.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationStatus.java @@ -17,7 +17,7 @@ import java.util.Set; */ public enum DocumentOperationStatus { - OK, REQUEST_ERROR, SERVER_ERROR; + OK, REQUEST_ERROR, SERVER_ERROR, CONDITION_FAILED, NOT_FOUND, TOO_MANY_REQUESTS; public static DocumentOperationStatus fromMessageBusErrorCodes(Set<Integer> errorCodes) { if (errorCodes.size() == 1 && errorCodes.contains(DocumentProtocol.ERROR_NO_SPACE)) diff --git a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java index 973d0a98b24..ea143b7e480 100644 --- a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java +++ b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java @@ -845,7 +845,7 @@ public class DocumentV1ApiTest { handler.get().handleResponse(new Response(0)); // response may eventually arrive, but too late. } - assertEquals(3, metric.metrics().get("httpapi_succeeded").get(Map.of()), 0); + assertEquals(5, metric.metrics().get("httpapi_succeeded").get(Map.of()), 0); assertEquals(1, metric.metrics().get("httpapi_condition_not_met").get(Map.of()), 0); assertEquals(1, metric.metrics().get("httpapi_not_found").get(Map.of()), 0); assertEquals(1, metric.metrics().get("httpapi_failed").get(Map.of()), 0); diff --git a/zookeeper-server/zookeeper-server/pom.xml b/zookeeper-server/zookeeper-server/pom.xml index f6c8952849c..d8dbe35ec9c 100644 --- a/zookeeper-server/zookeeper-server/pom.xml +++ b/zookeeper-server/zookeeper-server/pom.xml @@ -74,11 +74,6 @@ <artifactId>snappy-java</artifactId> <scope>compile</scope> </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <scope>test</scope> - </dependency> </dependencies> <build> <plugins> @@ -93,17 +88,6 @@ </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-surefire-plugin</artifactId> - <configuration> - <redirectTestOutputToFile>${test.hide}</redirectTestOutputToFile> - <forkMode>once</forkMode> - <systemPropertyVariables> - <zk-version>${zookeeper.version}</zk-version> - </systemPropertyVariables> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-install-plugin</artifactId> <configuration> <updateReleaseInfo>true</updateReleaseInfo> diff --git a/zookeeper-server/zookeeper-server/src/test/java/com/yahoo/vespa/zookeper/VespaZooKeeperTest.java b/zookeeper-server/zookeeper-server/src/test/java/com/yahoo/vespa/zookeper/VespaZooKeeperTest.java deleted file mode 100644 index 3a125d72a89..00000000000 --- a/zookeeper-server/zookeeper-server/src/test/java/com/yahoo/vespa/zookeper/VespaZooKeeperTest.java +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.zookeper; - -import com.yahoo.cloud.config.ZookeeperServerConfig; -import com.yahoo.net.HostName; -import com.yahoo.vespa.zookeeper.ReconfigurableVespaZooKeeperServer; -import com.yahoo.vespa.zookeeper.Reconfigurer; -import com.yahoo.vespa.zookeeper.VespaZooKeeperAdminImpl; -import com.yahoo.vespa.zookeeper.client.ZkClientConfigBuilder; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooDefs; -import org.apache.zookeeper.admin.ZooKeeperAdmin; -import org.apache.zookeeper.data.ACL; -import org.apache.zookeeper.data.Stat; -import org.junit.Ignore; -import org.junit.Test; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.net.ServerSocket; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.Phaser; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.IntStream; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.stream.Collectors.toList; -import static org.junit.Assert.assertEquals; - -/** - * @author jonmv - */ -public class VespaZooKeeperTest { - - static final Path tempDirRoot = getTmpDir(); - static final List<Integer> ports = new ArrayList<>(); - - /** - * Performs dynamic reconfiguration of ZooKeeper servers. - * <p> - * First, a cluster of 3 servers is set up, and some data is written to it. - * Then, 3 new servers are added, and the first 3 marked for retirement; - * this should force the quorum to move the 3 new servers, but not disconnect the old ones. - * Next, the old servers are removed. - * Then, the cluster is reduced to size 1. - * Finally, the cluster grows to size 3 again. - * <p> - * Throughout all of this, quorum should remain, and the data should remain the same. - */ - @Test(timeout = 120_000) - public void testReconfiguration() throws ExecutionException, InterruptedException, IOException, KeeperException, TimeoutException { - List<ZooKeeper> keepers = new ArrayList<>(); - for (int i = 0; i < 8; i++) keepers.add(new ZooKeeper()); - for (int i = 0; i < 8; i++) keepers.get(i).run(); - - // Start the first three servers. - List<ZookeeperServerConfig> configs = getConfigs(0, 0, 3, 0); - for (int i = 0; i < 3; i++) keepers.get(i).config = configs.get(i); - for (int i = 0; i < 3; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - - // Wait for all servers to be up and running. - for (int i = 0; i < 3; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - - // Write data to verify later. - String path = writeData(configs.get(0)); - - // Let three new servers join, causing the three older ones to retire and leave the ensemble. - configs = getConfigs(0, 3, 3, 3); - for (int i = 0; i < 6; i++) keepers.get(i).config = configs.get(i); - // The existing servers can't reconfigure and leave before the joiners are up. - for (int i = 0; i < 6; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - - // Wait for new quorum to be established. - for (int i = 0; i < 6; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - - // Verify written data is preserved. - verifyData(path, configs.get(3)); - - // Old servers are removed. - configs = getConfigs(3, 0, 3, 0); - for (int i = 0; i < 6; i++) keepers.get(i).config = configs.get(i); - // Old servers shut down, while the newer servers remain. - for (int i = 0; i < 6; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - // Ensure old servers shut down properly. - for (int i = 0; i < 3; i++) keepers.get(i).await(); - // Ensure new servers have reconfigured. - for (int i = 3; i < 6; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - - // Verify written data is preserved. - verifyData(path, configs.get(3)); - - - // Cluster shrinks to a single server. - configs = getConfigs(5, 0, 1, 0); - for (int i = 3; i < 6; i++) keepers.get(i).config = configs.get(i); - for (int i = 5; i < 6; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - for (int i = 5; i < 6; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - // We let the remaining server reconfigure the others out before they die. - for (int i = 3; i < 5; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - for (int i = 3; i < 5; i++) keepers.get(i).await(); - verifyData(path, configs.get(5)); - - // Cluster grows to 3 servers again. - configs = getConfigs(5, 0, 3, 2); - for (int i = 5; i < 8; i++) keepers.get(i).config = configs.get(i); - for (int i = 5; i < 8; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - // Wait for the joiners. - for (int i = 5; i < 8; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - verifyData(path, configs.get(7)); - - // Let the remaining servers terminate. - for (int i = 5; i < 8; i++) keepers.get(i).config = null; - for (int i = 5; i < 8; i++) keepers.get(i).phaser.arriveAndAwaitAdvance(); - for (int i = 5; i < 8; i++) keepers.get(i).await(); - } - - static String writeData(ZookeeperServerConfig config) throws IOException, InterruptedException, KeeperException { - try (ZooKeeperAdmin admin = createAdmin(config)) { - List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; - String node = admin.create("/test-node", "hi".getBytes(UTF_8), acl, CreateMode.PERSISTENT_SEQUENTIAL); - String read = new String(admin.getData(node, false, new Stat()), UTF_8); - assertEquals("hi", read); - return node; - } - } - - static void verifyData(String path, ZookeeperServerConfig config) throws IOException, InterruptedException, KeeperException { - for (int i = 0; i < 10; i++) { - try (ZooKeeperAdmin admin = createAdmin(config)) { - assertEquals("hi", new String(admin.getData(path, false, new Stat()), UTF_8)); - return; - } - catch (KeeperException.ConnectionLossException e) { - e.printStackTrace(); - Thread.sleep(10 << i); - } - } - } - - static ZooKeeperAdmin createAdmin(ZookeeperServerConfig config) throws IOException { - return new ZooKeeperAdmin(HostName.getLocalhost() + ":" + config.clientPort(), - 10_000, - System.err::println, - new ZkClientConfigBuilder().toConfig()); - } - - static class ZooKeeper { - - final ExecutorService executor = Executors.newSingleThreadExecutor(); - final Phaser phaser = new Phaser(2); - final AtomicReference<Future<?>> future = new AtomicReference<>(); - ZookeeperServerConfig config; - - void run() { - future.set(executor.submit(() -> { - Reconfigurer reconfigurer = new Reconfigurer(new VespaZooKeeperAdminImpl()); - phaser.arriveAndAwaitAdvance(); - while (config != null) { - new ReconfigurableVespaZooKeeperServer(reconfigurer, config); - phaser.arriveAndAwaitAdvance(); // server is now up, let test thread sync here - phaser.arriveAndAwaitAdvance(); // wait before reconfig/teardown to let test thread do stuff - } - reconfigurer.deconstruct(); - })); - } - - void await() throws ExecutionException, InterruptedException, TimeoutException { - future.get().get(30, SECONDS); - } - } - - static List<ZookeeperServerConfig> getConfigs(int removed, int retired, int active, int joining) { - return IntStream.rangeClosed(1, removed + retired + active) - .mapToObj(id -> getConfig(removed, retired, active, joining, id)) - .collect(toList()); - } - - // Config for server #id among retired + active servers, of which the last may be joining, and with offset removed. - static ZookeeperServerConfig getConfig(int removed, int retired, int active, int joining, int id) { - if (id <= removed) - return null; - - Path tempDir = tempDirRoot.resolve("zookeeper-" + id); - return new ZookeeperServerConfig.Builder() - .clientPort(getPorts(id).get(0)) - .dataDir(tempDir.toString()) - .zooKeeperConfigFile(tempDir.resolve("zookeeper.cfg").toString()) - .myid(id) - .myidFile(tempDir.resolve("myid").toString()) - .dynamicReconfiguration(true) - .server(IntStream.rangeClosed(removed + 1, removed + retired + active) - .mapToObj(i -> new ZookeeperServerConfig.Server.Builder() - .id(i) - .clientPort(getPorts(i).get(0)) - .electionPort(getPorts(i).get(1)) - .quorumPort(getPorts(i).get(2)) - .hostname("localhost") - .joining(i - removed > retired + active - joining) - .retired(i - removed <= retired)) - .collect(toList())) - .build(); - } - - static List<Integer> getPorts(int id) { - if (ports.size() < id * 3) { - int previousPort; - if (ports.isEmpty()) { - String[] version = System.getProperty("zk-version").split("\\."); - int versionPortOffset = 0; - for (String part : version) - versionPortOffset = 32 * (versionPortOffset + Integer.parseInt(part)); - previousPort = 20000 + versionPortOffset % 30000; - } - else - previousPort = ports.get(ports.size() - 1); - - for (int i = 0; i < 3; i++) - ports.add(previousPort = nextPort(previousPort)); - } - return ports.subList(id * 3 - 3, id * 3); - } - - static int nextPort(int previousPort) { - for (int j = 1; j <= 30000; j++) { - int port = (previousPort + j); - while (port > 50000) - port -= 30000; - - try (ServerSocket socket = new ServerSocket(port)) { - return socket.getLocalPort(); - } - catch (IOException e) { - System.err.println("Could not bind port " + port + ": " + e); - } - } - throw new RuntimeException("No free ports"); - } - - static Path getTmpDir() { - try { - Path tempDir = Files.createTempDirectory(Paths.get(System.getProperty("java.io.tmpdir")), "vespa-zk-test"); - tempDir.toFile().deleteOnExit(); - return tempDir.toAbsolutePath(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - -} |