diff options
58 files changed, 748 insertions, 1369 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java index f7398d0478f..b4a0c3bd578 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java @@ -113,6 +113,14 @@ public class ServiceCluster { } public String nodeDescription(boolean plural) { + return entityDescription("node", plural); + } + + public String serviceDescription(boolean plural) { + return entityDescription("service", plural); + } + + private String entityDescription(String entity, boolean plural) { String pluralSuffix = plural ? "s" : ""; return isConfigServer() ? "config server" + pluralSuffix : isConfigServerHost() ? "config server host" + pluralSuffix : @@ -121,7 +129,7 @@ public class ServiceCluster { isProxy() ? (plural ? "proxies" : "proxy") : isProxyHost() ? "proxy host" + pluralSuffix : isTenantHost() ? "tenant host" + pluralSuffix : - "node" + pluralSuffix + " of {" + serviceType + "," + clusterId + "}"; + entity + pluralSuffix + " of {" + serviceType + "," + clusterId + "}"; } private boolean isHostedVespaApplicationWithId(ApplicationInstanceId id) { diff --git a/client/go/Makefile b/client/go/Makefile index 17748d765c8..a86feb456ef 100644 --- a/client/go/Makefile +++ b/client/go/Makefile @@ -2,7 +2,7 @@ # The version to release. Defaults to the current tag or revision. # Use env VERSION=X.Y.Z make ... to override -VERSION ?= $(shell git describe --tags 2> /dev/null | sed -E "s/^vespa-|-1$$//g") +VERSION ?= $(shell git describe --tags 2> /dev/null | sed "s/^v//") DEVEL_VERSION := $(shell echo "0.0.0-`git rev-parse --short HEAD`") ifeq ($(VERSION),) VERSION = $(DEVEL_VERSION) @@ -26,10 +26,11 @@ all: test checkfmt install # # Example: # -# $ git checkout vespa-X.Y.Z-1 -# $ make dist-github +# $ git checkout vX.Y.Z +# $ make dist-homebrew dist-homebrew: dist-version - brew bump-formula-pr --tag vespa-$(VERSION)-1 --version $(VERSION) vespa-cli +# TODO(mpolden): Remove --version=0 after next release + brew bump-formula-pr --tag v$(VERSION) --version=0 vespa-cli # Create a GitHub release draft for all platforms. Note that this only creates a # draft, which is not publicly visible until it's explicitly published. @@ -40,7 +41,7 @@ dist-homebrew: dist-version # # Example: # -# $ git checkout vespa-X.Y.Z-1 +# $ git checkout vX.Y.Z # $ make dist-github dist-github: dist gh release create v$(VERSION) --repo vespa-engine/vespa --notes-file $(CURDIR)/README.md --draft --title "Vespa CLI $(VERSION)" \ @@ -83,7 +84,7 @@ dist-sha256sums: dist-version: ifeq ($(VERSION),$(DEVEL_VERSION)) - $(error Invalid release version: $(VERSION). Try 'git checkout vespa-X.Y.Z-1' or 'env VERSION=X.Y.Z make ...') + $(error Invalid release version: $(VERSION). Try 'git checkout vX.Y.Z' or 'env VERSION=X.Y.Z make ...') endif # diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/JobType.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/JobType.java index 9bcb80f24ee..bbcfb4e35a5 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/JobType.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/JobType.java @@ -139,6 +139,12 @@ public enum JobType { testCdUsCentral2 ("test-cd-us-central-2", Map.of(cd , ZoneId.from("prod" , "cd-us-central-2")), true), + productionCdUsEast1 ("production-cd-us-east-1", + Map.of(cd , ZoneId.from("prod" , "cd-us-east-1"))), + + testCdUsEast1 ("test-cd-us-east-1", + Map.of(cd , ZoneId.from("prod" , "cd-us-east-1")), true), + productionCdUsWest1 ("production-cd-us-west-1", Map.of(cd , ZoneId.from("prod" , "cd-us-west-1"))), diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java index b4c116d1903..117503b95dc 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java @@ -32,7 +32,7 @@ import java.util.logging.Logger; */ public abstract class HttpMetricFetcher { - private final static Logger log = Logger.getLogger(HttpMetricFetcher.class.getPackage().getName()); + private final static Logger log = Logger.getLogger(HttpMetricFetcher.class.getName()); public final static String STATE_PATH = "/state/v1/"; // The call to apache will do 3 retries. As long as we check the services in series, we can't have this too high. public static int CONNECTION_TIMEOUT = 5000; diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java index 59db14670aa..03e72ec36de 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java @@ -5,7 +5,6 @@ import ai.vespa.metricsproxy.metric.HealthMetric; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.concurrent.ExecutionException; import java.util.logging.Level; @@ -36,19 +35,19 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher { return createHealthMetrics(stream, fetchCount); } catch (IOException | InterruptedException | ExecutionException e) { logMessageNoResponse(errMsgNoResponse(e), fetchCount); - byte [] empty = {'{','}'}; - return createHealthMetrics(new ByteArrayInputStream(empty), fetchCount); + return HealthMetric.getUnknown("Failed fetching metrics for service: " + service.getMonitoringName()); } } /** * Connect to remote service over http and fetch metrics */ - private HealthMetric createHealthMetrics(InputStream data, int fetchCount) { + private HealthMetric createHealthMetrics(InputStream data, int fetchCount) throws IOException { try { return parse(data); } catch (Exception e) { handleException(e, data, fetchCount); + while (data.read() != -1) {} return HealthMetric.getDown("Failed fetching status page for service"); } } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java index 3ee1e05c263..aad2f816959 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java @@ -35,11 +35,12 @@ public class RemoteMetricsFetcher extends HttpMetricFetcher { handleException(e, data, fetchCount); } } - private void createMetrics(InputStream data, MetricsParser.Consumer consumer, int fetchCount) { + private void createMetrics(InputStream data, MetricsParser.Consumer consumer, int fetchCount) throws IOException { try { MetricsParser.parse(data, consumer); } catch (Exception e) { handleException(e, data, fetchCount); + while (data.read() != -1) {} } } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java index 2379659f74b..32336743bdc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeAttributes.java @@ -6,9 +6,11 @@ import com.yahoo.component.Version; import com.yahoo.config.provision.DockerImage; import java.time.Instant; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.TreeMap; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -29,6 +31,7 @@ public class NodeAttributes { private Optional<Version> vespaVersion = Optional.empty(); private Optional<Version> currentOsVersion = Optional.empty(); private Optional<Instant> currentFirmwareCheck = Optional.empty(); + private List<TrustStoreItem> trustStore = List.of(); /** The list of reports to patch. A null value is used to remove the report. */ private Map<String, JsonNode> reports = new TreeMap<>(); @@ -144,7 +147,17 @@ public class NodeAttributes { && Objects.equals(vespaVersion, other.vespaVersion) && Objects.equals(currentOsVersion, other.currentOsVersion) && Objects.equals(currentFirmwareCheck, other.currentFirmwareCheck) - && Objects.equals(reports, other.reports); + && Objects.equals(reports, other.reports) + && Objects.equals(trustStore, other.trustStore); + } + + public NodeAttributes withTrustStore(List<TrustStoreItem> trustStore) { + this.trustStore = List.copyOf(trustStore); + return this; + } + + public List<TrustStoreItem> getTrustStore() { + return trustStore; } @Override @@ -156,7 +169,8 @@ public class NodeAttributes { vespaVersion.map(ver -> "vespaVersion=" + ver.toFullString()), currentOsVersion.map(ver -> "currentOsVersion=" + ver.toFullString()), currentFirmwareCheck.map(at -> "currentFirmwareCheck=" + at), - Optional.ofNullable(reports.isEmpty() ? null : "reports=" + reports)) + Optional.ofNullable(reports.isEmpty() ? null : "reports=" + reports), + Optional.ofNullable(trustStore.isEmpty() ? null : "trustStore=" + trustStore)) .filter(Optional::isPresent) .map(Optional::get) .collect(Collectors.joining(", ", "{", "}")); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java index e85d51ef992..4000a0ac182 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeSpec.java @@ -68,6 +68,8 @@ public class NodeSpec { private final Optional<ApplicationId> exclusiveTo; + private final List<TrustStoreItem> trustStore; + public NodeSpec( String hostname, Optional<String> id, @@ -98,7 +100,8 @@ public class NodeSpec { List<Event> events, Optional<String> parentHostname, Optional<URI> archiveUri, - Optional<ApplicationId> exclusiveTo) { + Optional<ApplicationId> exclusiveTo, + List<TrustStoreItem> trustStore) { if (state == NodeState.active) { requireOptional(owner, "owner"); requireOptional(membership, "membership"); @@ -138,6 +141,7 @@ public class NodeSpec { this.parentHostname = Objects.requireNonNull(parentHostname); this.archiveUri = Objects.requireNonNull(archiveUri); this.exclusiveTo = Objects.requireNonNull(exclusiveTo); + this.trustStore = Objects.requireNonNull(trustStore); } public String hostname() { @@ -283,6 +287,10 @@ public class NodeSpec { return exclusiveTo; } + public List<TrustStoreItem> trustStore() { + return trustStore; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -319,7 +327,8 @@ public class NodeSpec { Objects.equals(events, that.events) && Objects.equals(parentHostname, that.parentHostname) && Objects.equals(archiveUri, that.archiveUri) && - Objects.equals(exclusiveTo, that.exclusiveTo); + Objects.equals(exclusiveTo, that.exclusiveTo) && + Objects.equals(trustStore, that.trustStore); } @Override @@ -354,7 +363,8 @@ public class NodeSpec { events, parentHostname, archiveUri, - exclusiveTo); + exclusiveTo, + trustStore); } @Override @@ -390,6 +400,7 @@ public class NodeSpec { + " parentHostname=" + parentHostname + " archiveUri=" + archiveUri + " exclusiveTo=" + exclusiveTo + + " trustStore=" + trustStore + " }"; } @@ -424,6 +435,7 @@ public class NodeSpec { private Optional<String> parentHostname = Optional.empty(); private Optional<URI> archiveUri = Optional.empty(); private Optional<ApplicationId> exclusiveTo = Optional.empty(); + private List<TrustStoreItem> trustStore = List.of(); public Builder() {} @@ -456,6 +468,7 @@ public class NodeSpec { node.parentHostname.ifPresent(this::parentHostname); node.archiveUri.ifPresent(this::archiveUri); node.exclusiveTo.ifPresent(this::exclusiveTo); + trustStore(node.trustStore); } public Builder hostname(String hostname) { @@ -633,12 +646,19 @@ public class NodeSpec { return this; } + public Builder trustStore(List<TrustStoreItem> trustStore) { + this.trustStore = List.copyOf(trustStore); + return this; + } + public Builder updateFromNodeAttributes(NodeAttributes attributes) { attributes.getHostId().ifPresent(this::id); attributes.getDockerImage().ifPresent(this::currentDockerImage); attributes.getCurrentOsVersion().ifPresent(this::currentOsVersion); attributes.getRebootGeneration().ifPresent(this::currentRebootGeneration); attributes.getRestartGeneration().ifPresent(this::currentRestartGeneration); + // Always replace entire trust store + trustStore(attributes.getTrustStore()); this.reports.updateFromRawMap(attributes.getReports()); return this; @@ -752,7 +772,7 @@ public class NodeSpec { wantedRebootGeneration, currentRebootGeneration, wantedFirmwareCheck, currentFirmwareCheck, modelName, resources, realResources, ipAddresses, additionalIpAddresses, - reports, events, parentHostname, archiveUri, exclusiveTo); + reports, events, parentHostname, archiveUri, exclusiveTo, trustStore); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java index abc779d8a9a..d5e3acf0656 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/RealNodeRepository.java @@ -155,6 +155,11 @@ public class RealNodeRepository implements NodeRepository { .map(event -> new Event(event.agent, event.event, Optional.ofNullable(event.at).map(Instant::ofEpochMilli).orElse(Instant.EPOCH))) .collect(Collectors.toUnmodifiableList()); + List<TrustStoreItem> trustStore = Optional.ofNullable(node.trustStore).orElse(List.of()).stream() + .map(item -> new TrustStoreItem(item.fingerprint, Instant.ofEpochMilli(item.expiry))) + .collect(Collectors.toList()); + + return new NodeSpec( node.hostname, Optional.ofNullable(node.openStackId), @@ -185,7 +190,8 @@ public class RealNodeRepository implements NodeRepository { events, Optional.ofNullable(node.parentHostname), Optional.ofNullable(node.archiveUri).map(URI::create), - Optional.ofNullable(node.exclusiveTo).map(ApplicationId::fromSerializedForm)); + Optional.ofNullable(node.exclusiveTo).map(ApplicationId::fromSerializedForm), + trustStore); } private static NodeResources nodeResources(NodeRepositoryNode.NodeResources nodeResources) { @@ -270,7 +276,9 @@ public class RealNodeRepository implements NodeRepository { node.vespaVersion = nodeAttributes.getVespaVersion().map(Version::toFullString).orElse(null); node.currentOsVersion = nodeAttributes.getCurrentOsVersion().map(Version::toFullString).orElse(null); node.currentFirmwareCheck = nodeAttributes.getCurrentFirmwareCheck().map(Instant::toEpochMilli).orElse(null); - + node.trustStore = nodeAttributes.getTrustStore().stream() + .map(item -> new NodeRepositoryNode.TrustStoreItem(item.fingerprint(), item.expiry().toEpochMilli())) + .collect(Collectors.toList()); Map<String, JsonNode> reports = nodeAttributes.getReports(); node.reports = reports == null || reports.isEmpty() ? null : new TreeMap<>(reports); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java new file mode 100644 index 00000000000..d3e797bca24 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/TrustStoreItem.java @@ -0,0 +1,48 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.node.admin.configserver.noderepository; + +import java.time.Instant; +import java.util.Objects; + +/** + * @author mortent + */ +public class TrustStoreItem { + private final String fingerprint; + private final Instant expiry; + + public TrustStoreItem(String fingerprint, Instant expiry) { + this.fingerprint = fingerprint; + this.expiry = expiry; + } + + public String fingerprint() { + return fingerprint; + } + + public Instant expiry() { + return expiry; + } + + @Override + public String toString() { + return "TrustStoreItem{" + + "fingerprint='" + fingerprint + '\'' + + ", expiry=" + expiry + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TrustStoreItem that = (TrustStoreItem) o; + return Objects.equals(fingerprint, that.fingerprint) && Objects.equals(expiry, that.expiry); + } + + @Override + public int hashCode() { + return Objects.hash(fingerprint, expiry); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java index 4282c67b4cd..f471f9a9965 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/bindings/NodeRepositoryNode.java @@ -85,6 +85,9 @@ public class NodeRepositoryNode { public String exclusiveTo; @JsonProperty("history") public List<Event> history; + @JsonProperty("trustStore") + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public List<TrustStoreItem> trustStore; @JsonProperty("reports") public Map<String, JsonNode> reports = null; @@ -221,4 +224,17 @@ public class NodeRepositoryNode { '}'; } } + @JsonIgnoreProperties(ignoreUnknown = true) + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class TrustStoreItem { + @JsonProperty ("fingerprint") + public String fingerprint; + @JsonProperty ("expiry") + public long expiry; + + public TrustStoreItem(@JsonProperty("fingerprint") String fingerprint, @JsonProperty("expiry") long expiry) { + this.fingerprint = fingerprint; + this.expiry = expiry; + } + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 240e041a504..df3ac00ce7f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -18,13 +18,16 @@ import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.NodeAcl; import com.yahoo.vespa.hosted.provision.node.Reports; import com.yahoo.vespa.hosted.provision.node.Status; +import com.yahoo.vespa.hosted.provision.node.TrustStoreItem; import java.time.Instant; import java.util.Arrays; import java.util.EnumSet; +import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * A node in the node repository. The identity of a node is given by its id. @@ -50,6 +53,7 @@ public final class Node implements Nodelike { private final Optional<ApplicationId> exclusiveToApplicationId; private final Optional<ClusterSpec.Type> exclusiveToClusterType; private final Optional<String> switchHostname; + private final List<TrustStoreItem> trustStoreItems; /** Record of the last event of each type happening to this node */ private final History history; @@ -79,7 +83,7 @@ public final class Node implements Nodelike { Flavor flavor, Status status, State state, Optional<Allocation> allocation, History history, NodeType type, Reports reports, Optional<String> modelName, Optional<TenantName> reservedTo, Optional<ApplicationId> exclusiveToApplicationId, Optional<ClusterSpec.Type> exclusiveToClusterType, - Optional<String> switchHostname) { + Optional<String> switchHostname, List<TrustStoreItem> trustStoreItems) { this.id = Objects.requireNonNull(id, "A node must have an ID"); this.hostname = requireNonEmptyString(hostname, "A node must have a hostname"); this.ipConfig = Objects.requireNonNull(ipConfig, "A node must a have an IP config"); @@ -96,6 +100,7 @@ public final class Node implements Nodelike { this.exclusiveToApplicationId = Objects.requireNonNull(exclusiveToApplicationId, "exclusiveToApplicationId cannot be null"); this.exclusiveToClusterType = Objects.requireNonNull(exclusiveToClusterType, "exclusiveToClusterType cannot be null"); this.switchHostname = requireNonEmptyString(switchHostname, "switchHostname cannot be null"); + this.trustStoreItems = trustStoreItems.stream().distinct().collect(Collectors.toUnmodifiableList()); if (state == State.active) requireNonEmpty(ipConfig.primary(), "Active node " + hostname + " must have at least one valid IP address"); @@ -207,6 +212,11 @@ public final class Node implements Nodelike { return switchHostname; } + /** Returns the trusted certificates for this host if any. */ + public List<TrustStoreItem> trustedCertificates() { + return trustStoreItems; + } + /** * Returns a copy of this where wantToFail is set to true and history is updated to reflect this. */ @@ -295,13 +305,13 @@ public final class Node implements Nodelike { /** Returns a node with the status assigned to the given value */ public Node with(Status status) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, - reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a node with the type assigned to the given value */ public Node with(NodeType type) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history, type, - reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a node with the flavor assigned to the given value */ @@ -309,31 +319,31 @@ public final class Node implements Nodelike { if (flavor.equals(this.flavor)) return this; History updateHistory = history.with(new History.Event(History.Event.Type.resized, agent, instant)); return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, allocation, updateHistory, type, - reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this with the reboot generation set to generation */ public Node withReboot(Generation generation) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status.withReboot(generation), state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this with the openStackId set */ public Node withOpenStackId(String openStackId) { return new Node(openStackId, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this with model name set to given value */ public Node withModelName(String modelName) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, Optional.of(modelName), reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, Optional.of(modelName), reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this with model name cleared */ public Node withoutModelName() { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, Optional.empty(), reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, Optional.empty(), reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this with a history record saying it was detected to be down at this instant */ @@ -364,55 +374,55 @@ public final class Node implements Nodelike { */ public Node with(Allocation allocation) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - Optional.of(allocation), history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + Optional.of(allocation), history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a new Node without an allocation. */ public Node withoutAllocation() { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - Optional.empty(), history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + Optional.empty(), history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this node with IP config set to the given value. */ public Node with(IP.Config ipConfig) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this node with the parent hostname assigned to the given value. */ public Node withParentHostname(String parentHostname) { return new Node(id, ipConfig, hostname, Optional.of(parentHostname), flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } public Node withReservedTo(TenantName tenant) { if (type != NodeType.host) throw new IllegalArgumentException("Only host nodes can be reserved, " + hostname + " has type " + type); return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, Optional.of(tenant), exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, Optional.of(tenant), exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } /** Returns a copy of this node which is not reserved to a tenant */ public Node withoutReservedTo() { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, Optional.empty(), exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, Optional.empty(), exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } public Node withExclusiveToApplicationId(ApplicationId exclusiveTo) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, Optional.ofNullable(exclusiveTo), exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, Optional.ofNullable(exclusiveTo), exclusiveToClusterType, switchHostname, trustStoreItems); } public Node withExclusiveToClusterType(ClusterSpec.Type exclusiveTo) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, Optional.ofNullable(exclusiveTo), switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, Optional.ofNullable(exclusiveTo), switchHostname, trustStoreItems); } /** Returns a copy of this node with switch hostname set to given value */ public Node withSwitchHostname(String switchHostname) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, Optional.ofNullable(switchHostname)); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, Optional.ofNullable(switchHostname), trustStoreItems); } /** Returns a copy of this node with switch hostname unset */ @@ -450,12 +460,18 @@ public final class Node implements Nodelike { /** Returns a copy of this node with the given history. */ public Node with(History history) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } public Node with(Reports reports) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname); + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); + } + + public Node with(List<TrustStoreItem> trustStoreItems) { + return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, + allocation, history, type, reports, modelName, reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, + trustStoreItems); } private static Optional<String> requireNonEmptyString(Optional<String> value, String message) { @@ -594,6 +610,7 @@ public final class Node implements Nodelike { private Status status; private Reports reports; private History history; + private List<TrustStoreItem> trustStoreItems; private Builder(String id, String hostname, Flavor flavor, State state, NodeType type) { this.id = id; @@ -663,12 +680,18 @@ public final class Node implements Nodelike { return this; } + public Builder trustedCertificates(List<TrustStoreItem> trustStoreItems) { + this.trustStoreItems = trustStoreItems; + return this; + } + public Node build() { return new Node(id, Optional.ofNullable(ipConfig).orElse(IP.Config.EMPTY), hostname, Optional.ofNullable(parentHostname), - flavor, Optional.ofNullable(status).orElseGet(Status::initial), state, Optional.ofNullable(allocation), - Optional.ofNullable(history).orElseGet(History::empty), type, Optional.ofNullable(reports).orElseGet(Reports::new), - Optional.ofNullable(modelName), Optional.ofNullable(reservedTo), Optional.ofNullable(exclusiveToApplicationId), - Optional.ofNullable(exclusiveToClusterType), Optional.ofNullable(switchHostname)); + flavor, Optional.ofNullable(status).orElseGet(Status::initial), state, Optional.ofNullable(allocation), + Optional.ofNullable(history).orElseGet(History::empty), type, Optional.ofNullable(reports).orElseGet(Reports::new), + Optional.ofNullable(modelName), Optional.ofNullable(reservedTo), Optional.ofNullable(exclusiveToApplicationId), + Optional.ofNullable(exclusiveToClusterType), Optional.ofNullable(switchHostname), + Optional.ofNullable(trustStoreItems).orElseGet(List::of)); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/TrustStoreItem.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/TrustStoreItem.java new file mode 100644 index 00000000000..6fb94d0bc62 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/TrustStoreItem.java @@ -0,0 +1,68 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.hosted.provision.node; + +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Inspector; + +import java.time.Instant; +import java.util.Objects; + +/** + * Contains the fingerprint and expiry of certificates in a hosts truststore. + * + * @author mortent + */ +public class TrustStoreItem { + private static final String FINGERPRINT_FIELD = "fingerprint"; + private static final String EXPIRY_FIELD = "expiry"; + + private final String fingerprint; + private final Instant expiry; + + public TrustStoreItem(String fingerprint, Instant expiry) { + this.fingerprint = fingerprint; + this.expiry = expiry; + } + + public String fingerprint() { + return fingerprint; + } + + public Instant expiry() { + return expiry; + } + + public void toSlime(Cursor trustedCertificatesRoot) { + Cursor object = trustedCertificatesRoot.addObject(); + object.setString(FINGERPRINT_FIELD, fingerprint); + object.setLong(EXPIRY_FIELD, expiry.toEpochMilli()); + } + + public static TrustStoreItem fromSlime(Inspector inspector) { + String fingerprint = inspector.field(FINGERPRINT_FIELD).asString(); + Instant expiry = Instant.ofEpochMilli(inspector.field(EXPIRY_FIELD).asLong()); + return new TrustStoreItem(fingerprint, expiry); + } + + @Override + public String toString() { + return "TrustedCertificate{" + + "fingerprint='" + fingerprint + '\'' + + ", expiry=" + expiry + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TrustStoreItem that = (TrustStoreItem) o; + return Objects.equals(fingerprint, that.fingerprint) && Objects.equals(expiry, that.expiry); + } + + @Override + public int hashCode() { + return Objects.hash(fingerprint, expiry); + } +}
\ No newline at end of file diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index d5a4c459ef3..7e252391cb3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -210,7 +210,7 @@ public class CuratorDatabaseClient { toState.isAllocated() ? node.allocation() : Optional.empty(), node.history().recordStateTransition(node.state(), toState, agent, clock.instant()), node.type(), node.reports(), node.modelName(), node.reservedTo(), - node.exclusiveToApplicationId(), node.exclusiveToClusterType(), node.switchHostname()); + node.exclusiveToApplicationId(), node.exclusiveToClusterType(), node.switchHostname(), node.trustedCertificates()); writeNode(toState, curatorTransaction, node, newNode); writtenNodes.add(newNode); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 19bbe92eff6..868837daeeb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -36,6 +36,7 @@ import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.OsVersion; import com.yahoo.vespa.hosted.provision.node.Reports; import com.yahoo.vespa.hosted.provision.node.Status; +import com.yahoo.vespa.hosted.provision.node.TrustStoreItem; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -95,6 +96,7 @@ public class NodeSerializer { private static final String exclusiveToApplicationIdKey = "exclusiveTo"; private static final String exclusiveToClusterTypeKey = "exclusiveToClusterType"; private static final String switchHostnameKey = "switchHostname"; + private static final String trustedCertificatesKey = "trustedCertificates"; // Node resource fields private static final String flavorKey = "flavor"; @@ -122,6 +124,10 @@ public class NodeSerializer { // Network port fields private static final String networkPortsKey = "networkPorts"; + // Trusted certificates fields + private static final String fingerprintKey = "fingerprint"; + private static final String expiresKey = "expires"; + // A cache of deserialized Node objects. The cache is keyed on the hash of serialized node data. // // Deserializing a Node from slime is expensive, and happens frequently. Node instances that have already been @@ -182,6 +188,7 @@ public class NodeSerializer { node.reservedTo().ifPresent(tenant -> object.setString(reservedToKey, tenant.value())); node.exclusiveToApplicationId().ifPresent(applicationId -> object.setString(exclusiveToApplicationIdKey, applicationId.serializedForm())); node.exclusiveToClusterType().ifPresent(clusterType -> object.setString(exclusiveToClusterTypeKey, clusterType.name())); + trustedCertificatesToSlime(node.trustedCertificates(), object.setArray(trustedCertificatesKey)); } private void toSlime(Flavor flavor, Cursor object) { @@ -236,6 +243,14 @@ public class NodeSerializer { }); } + private void trustedCertificatesToSlime(List<TrustStoreItem> trustStoreItems, Cursor array) { + trustStoreItems.forEach(cert -> { + Cursor object = array.addObject(); + object.setString(fingerprintKey, cert.fingerprint()); + object.setLong(expiresKey, cert.expiry().toEpochMilli()); + }); + } + // ---------------- Deserialization -------------------------------------------------- public Node fromJson(Node.State state, byte[] data) { @@ -269,7 +284,8 @@ public class NodeSerializer { reservedToFromSlime(object.field(reservedToKey)), exclusiveToApplicationIdFromSlime(object.field(exclusiveToApplicationIdKey)), exclusiveToClusterTypeFromSlime(object.field(exclusiveToClusterTypeKey)), - switchHostnameFromSlime(object.field(switchHostnameKey))); + switchHostnameFromSlime(object.field(switchHostnameKey)), + trustedCertificatesFromSlime(object)); } private Status statusFromSlime(Inspector object) { @@ -419,6 +435,13 @@ public class NodeSerializer { return Optional.of(ClusterSpec.Type.from(object.asString())); } + private List<TrustStoreItem> trustedCertificatesFromSlime(Inspector object) { + return SlimeUtils.entriesStream(object.field(trustedCertificatesKey)) + .map(elem -> new TrustStoreItem(elem.field(fingerprintKey).asString(), + Instant.ofEpochMilli(elem.field(expiresKey).asLong()))) + .collect(Collectors.toList()); + } + // ----------------- Enum <-> string mappings ---------------------------------------- /** Returns the event type, or null if this event type should be ignored */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java index 8d37c13d2bc..5dfacc2c3d2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java @@ -25,6 +25,7 @@ import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.Report; import com.yahoo.vespa.hosted.provision.node.Reports; +import com.yahoo.vespa.hosted.provision.node.TrustStoreItem; import java.io.IOException; import java.io.InputStream; @@ -186,6 +187,8 @@ public class NodePatcher implements AutoCloseable { return node.withExclusiveToClusterType(SlimeUtils.optionalString(value).map(ClusterSpec.Type::valueOf).orElse(null)); case "switchHostname": return value.type() == Type.NIX ? node.withoutSwitchHostname() : node.withSwitchHostname(value.asString()); + case "trustStore": + return nodeWithTrustStore(node, value); default : throw new IllegalArgumentException("Could not apply field '" + name + "' on a node: No such modifiable field"); } @@ -230,6 +233,14 @@ public class NodePatcher implements AutoCloseable { return patchedNode; } + private Node nodeWithTrustStore(Node node, Inspector inspector) { + List<TrustStoreItem> trustStoreItems = + SlimeUtils.entriesStream(inspector) + .map(TrustStoreItem::fromSlime) + .collect(Collectors.toList()); + return node.with(trustStoreItems); + } + private Set<String> asStringSet(Inspector field) { if ( ! field.type().equals(Type.ARRAY)) throw new IllegalArgumentException("Expected an ARRAY value, got a " + field.type()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java index 2cf671514c4..80100128379 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Address; import com.yahoo.vespa.hosted.provision.node.History; +import com.yahoo.vespa.hosted.provision.node.TrustStoreItem; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostInfo; import com.yahoo.vespa.orchestrator.status.HostStatus; @@ -182,6 +183,7 @@ class NodesResponse extends SlimeJsonResponse { node.modelName().ifPresent(modelName -> object.setString("modelName", modelName)); node.switchHostname().ifPresent(switchHostname -> object.setString("switchHostname", switchHostname)); nodeRepository.archiveUris().archiveUriFor(node).ifPresent(uri -> object.setString("archiveUri", uri)); + trustedCertsToSlime(node.trustedCertificates(), object); } private void toSlime(ApplicationId id, Cursor object) { @@ -228,6 +230,12 @@ class NodesResponse extends SlimeJsonResponse { addresses.forEach(address -> addressesArray.addString(address.hostname())); } + private void trustedCertsToSlime(List<TrustStoreItem> trustStoreItems, Cursor object) { + if (trustStoreItems.isEmpty()) return; + Cursor array = object.setArray("trustStore"); + trustStoreItems.forEach(cert -> cert.toSlime(array)); + } + private String lastElement(String path) { if (path.endsWith("/")) path = path.substring(0, path.length()-1); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java index 158a1d6e5ac..921b78252d3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java @@ -29,6 +29,7 @@ import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.Report; import com.yahoo.vespa.hosted.provision.node.Reports; +import com.yahoo.vespa.hosted.provision.node.TrustStoreItem; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import org.junit.Test; @@ -462,6 +463,16 @@ public class NodeSerializerTest { assertEquals(exclusiveToCluster, node.exclusiveToClusterType().get()); } + @Test + public void truststore_serialization() { + Node node = nodeSerializer.fromJson(State.active, nodeSerializer.toJson(createNode())); + assertEquals(List.of(), node.trustedCertificates()); + List<TrustStoreItem> trustStoreItems = List.of(new TrustStoreItem("foo", Instant.parse("2023-09-01T23:59:59Z")), new TrustStoreItem("bar", Instant.parse("2025-05-20T23:59:59Z"))); + node = node.with(trustStoreItems); + node = nodeSerializer.fromJson(State.active, nodeSerializer.toJson(node)); + assertEquals(trustStoreItems, node.trustedCertificates()); + } + private byte[] createNodeJson(String hostname, String... ipAddress) { String ipAddressJsonPart = ""; if (ipAddress.length > 0) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index 6c052a6c364..88a8a22913e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -1023,6 +1023,26 @@ public class NodesV2ApiTest { tester.assertPartialResponse(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com"), "archiveUri", false); } + @Test + public void trusted_certificates_patch() throws IOException { + String url = "http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com"; + tester.assertPartialResponse(new Request(url), "\"trustStore\":[]", false); // initially empty list + + String trustStore = "\"trustStore\":[" + + "{" + + "\"fingerprint\":\"foo\"," + + "\"expiry\":1632302251000" + + "}," + + "{" + + "\"fingerprint\":\"bar\"," + + "\"expiry\":1758532706000" + + "}" + + "]"; + assertResponse(new Request(url, Utf8.toBytes("{"+trustStore+"}"), Request.Method.PATCH), + "{\"message\":\"Updated dockerhost1.yahoo.com\"}"); + tester.assertPartialResponse(new Request(url), trustStore, true); + } + private static String asDockerNodeJson(String hostname, String parentHostname, String... ipAddress) { return asDockerNodeJson(hostname, NodeType.tenant, parentHostname, ipAddress); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java index fd115702588..47ba6dedd84 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java @@ -17,13 +17,20 @@ public interface ClusterApi { String clusterInfo(); ClusterId clusterId(); ServiceType serviceType(); + + /** Some human-readable string naming the service(s) to a human reader. */ + String serviceDescription(boolean plural); + boolean isStorageCluster(); - /** Returns the reasons no services are up in the implied group, or empty if some services are up. */ - Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp(); + boolean isConfigServerLike(); + + /** Returns the non-empty reasons for why all services are down, or otherwise empty. */ + Optional<SuspensionReasons> allServicesDown(); + boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException; - int percentageOfServicesDown(); + int percentageOfServicesDownOutsideGroup(); int percentageOfServicesDownIfGroupIsAllowedToBeDown(); Optional<StorageNode> storageNodeInGroup(); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java index 42f8a187e98..6880700e4a9 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java @@ -107,6 +107,11 @@ class ClusterApiImpl implements ClusterApi { } @Override + public String serviceDescription(boolean plural) { + return serviceCluster.serviceDescription(plural); + } + + @Override public boolean isStorageCluster() { return VespaModelUtil.isStorage(serviceCluster); } @@ -117,7 +122,12 @@ class ClusterApiImpl implements ClusterApi { } @Override - public Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp() { + public boolean isConfigServerLike() { + return serviceCluster.isConfigServerLike(); + } + + @Override + public Optional<SuspensionReasons> allServicesDown() { SuspensionReasons reasons = new SuspensionReasons(); for (ServiceInstance service : servicesInGroup) { @@ -157,9 +167,8 @@ class ClusterApiImpl implements ClusterApi { } @Override - public int percentageOfServicesDown() { - int servicesDownInGroupCount = (int) servicesInGroup.stream().filter(this::serviceEffectivelyDown).count(); - int numberOfServicesDown = servicesDownAndNotInGroup().size() + missingServices + servicesDownInGroupCount; + public int percentageOfServicesDownOutsideGroup() { + int numberOfServicesDown = servicesDownAndNotInGroup().size() + missingServices; return numberOfServicesDown * 100 / (serviceCluster.serviceInstances().size() + missingServices); } @@ -187,12 +196,11 @@ class ClusterApiImpl implements ClusterApi { description.append(" "); final int nodeLimit = 3; - description.append("Suspended hosts: "); description.append(suspended.stream().sorted().distinct().limit(nodeLimit).collect(Collectors.toList()).toString()); if (suspended.size() > nodeLimit) { - description.append(", and " + (suspended.size() - nodeLimit) + " more"); + description.append(" and " + (suspended.size() - nodeLimit) + " others"); } - description.append("."); + description.append(" are suspended."); } Set<ServiceInstance> downElsewhere = servicesDownAndNotInGroup().stream() @@ -204,7 +212,6 @@ class ClusterApiImpl implements ClusterApi { description.append(" "); final int serviceLimit = 2; // services info is verbose - description.append("Services down on resumed hosts: "); description.append(Stream.concat( downElsewhere.stream().map(ServiceInstance::toString).sorted(), missingServices > 0 ? Stream.of(descriptionOfMissingServices) : Stream.of()) @@ -213,9 +220,9 @@ class ClusterApiImpl implements ClusterApi { .toString()); if (downElsewhereTotal > serviceLimit) { - description.append(", and " + (downElsewhereTotal - serviceLimit) + " more"); + description.append(" and " + (downElsewhereTotal - serviceLimit) + " others"); } - description.append("."); + description.append(" are down."); } return description.toString(); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index 3a6c24a05e3..fe06ef7d75b 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -23,6 +23,16 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { @Override public SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { + return verifyGroupGoingDownIsFine(clusterApi, false); + } + + @Override + public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { + verifyGroupGoingDownIsFine(clusterApi, true); + } + + private SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi, boolean permanent) + throws HostStateChangeDeniedException { if (clusterApi.noServicesOutsideGroupIsDown()) { return SuspensionReasons.nothingNoteworthy(); } @@ -32,46 +42,28 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { return SuspensionReasons.nothingNoteworthy(); } - Optional<SuspensionReasons> suspensionReasons = clusterApi.reasonsForNoServicesInGroupIsUp(); - if (suspensionReasons.isPresent()) { - return suspensionReasons.get(); + // Be a bit more cautious when removing nodes permanently + if (!permanent) { + // Disallow suspending a 2nd and downed config server to avoid losing ZK quorum. + if (!clusterApi.isConfigServerLike()) { + Optional<SuspensionReasons> suspensionReasons = clusterApi.allServicesDown(); + if (suspensionReasons.isPresent()) { + return suspensionReasons.get(); + } + } } String message = percentageOfServicesAllowedToBeDown <= 0 - ? "Suspension of service with type '" + clusterApi.serviceType() + "' not allowed: " - + clusterApi.percentageOfServicesDown() + "% are suspended already." + clusterApi.downDescription() - : "Suspension of service with type '" + clusterApi.serviceType() - + "' would increase from " + clusterApi.percentageOfServicesDown() - + "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() - + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%." - + clusterApi.downDescription(); + ? clusterApi.percentageOfServicesDownOutsideGroup() + "% of the " + clusterApi.serviceDescription(true) + + " are down or suspended already:" + clusterApi.downDescription() + : "The percentage of downed or suspended " + clusterApi.serviceDescription(true) + + " would increase from " + clusterApi.percentageOfServicesDownOutsideGroup() + "% to " + + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() + "% (limit is " + + percentageOfServicesAllowedToBeDown + "%):" + clusterApi.downDescription(); throw new HostStateChangeDeniedException(clusterApi.getNodeGroup(), ENOUGH_SERVICES_UP_CONSTRAINT, message); } - @Override - public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { - // This policy is similar to verifyGroupGoingDownIsFine, except that having no services up in the group will - // not allow the suspension: We are a bit more cautious when removing nodes. - - if (clusterApi.noServicesOutsideGroupIsDown()) { - return; - } - - int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); - if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { - return; - } - - throw new HostStateChangeDeniedException( - clusterApi.getNodeGroup(), - ENOUGH_SERVICES_UP_CONSTRAINT, - "Down percentage for service type " + clusterApi.serviceType() - + " would increase to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() - + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%." - + clusterApi.downDescription()); - } - // Non-private for testing purposes ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) { // Possible service clusters on a node as of 2021-01-22: diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorTest.java index 83c902ed981..f885d5301de 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorTest.java @@ -102,7 +102,7 @@ public class OrchestratorTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), containsString("Changing the state of cfg2 would violate enough-services-up")); - assertThat(e.getMessage(), containsString("Suspended hosts: [cfg1]")); + assertThat(e.getMessage(), containsString("[cfg1] are suspended.")); } // cfg1 is removed from the application @@ -114,7 +114,7 @@ public class OrchestratorTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), containsString("Changing the state of cfg2 would violate enough-services-up")); - assertThat(e.getMessage(), containsString("Services down on resumed hosts: [1 missing config server]")); + assertThat(e.getMessage(), containsString("[1 missing config server] are down.")); } // cfg1 is reprovisioned, added to the node repo, and activated @@ -129,7 +129,7 @@ public class OrchestratorTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), containsString("Changing the state of cfg1 would violate enough-services-up")); - assertThat(e.getMessage(), containsString("Suspended hosts: [cfg2]")); + assertThat(e.getMessage(), containsString("[cfg2] are suspended")); } // etc (should be the same as for cfg1) diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index da8591c6631..3ea739b385e 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -95,11 +95,11 @@ public class ClusterApiImplTest { assertEquals("{ clusterId=cluster, serviceType=service-type }", clusterApi.clusterInfo()); assertFalse(clusterApi.isStorageCluster()); - assertEquals(" Suspended hosts: [host3, host4]. Services down on resumed hosts: [" + - "ServiceInstance{configId=service-2, hostName=host2, serviceStatus=" + - "ServiceStatusInfo{status=DOWN, since=Optional.empty, lastChecked=Optional.empty}}].", + assertEquals(" [host3, host4] are suspended. [ServiceInstance{configId=service-2, hostName=host2, " + + "serviceStatus=ServiceStatusInfo{status=DOWN, since=Optional.empty, lastChecked=Optional.empty}}] " + + "are down.", clusterApi.downDescription()); - assertEquals(60, clusterApi.percentageOfServicesDown()); + assertEquals(60, clusterApi.percentageOfServicesDownOutsideGroup()); assertEquals(80, clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()); } @@ -181,8 +181,8 @@ public class ClusterApiImplTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), - containsString("Suspension of service with type 'configserver' not allowed: 33% are suspended already. " + - "Services down on resumed hosts: [1 missing config server].")); + containsString("Changing the state of cfg1 would violate enough-services-up: 33% of the config " + + "servers are down or suspended already: [1 missing config server] are down.")); } } @@ -201,18 +201,25 @@ public class ClusterApiImplTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), - containsString("Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " + - "Services down on resumed hosts: [1 missing config server host].")); + containsString("Changing the state of cfg1 would violate enough-services-up: 33% of the config " + + "server hosts are down or suspended already: [1 missing config server host] are down.")); } } @Test - public void testCfg1SuspendsIfDownWithMissingCfg3() throws HostStateChangeDeniedException { + public void testCfg1DoesNotSuspendIfDownWithMissingCfg3() throws HostStateChangeDeniedException { ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.DOWN, ServiceStatus.UP); HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource, zone); - policy.verifyGroupGoingDownIsFine(clusterApi); + try { + policy.verifyGroupGoingDownIsFine(clusterApi); + fail(); + } catch (HostStateChangeDeniedException e) { + assertThat(e.getMessage(), + containsString("Changing the state of cfg1 would violate enough-services-up: 33% of the config " + + "servers are down or suspended already: [1 missing config server] are down.")); + } } @Test @@ -320,7 +327,7 @@ public class ClusterApiImplTest { clock); assertEquals(expectedNoServicesInGroupIsUp.map(SuspensionReasons::getMessagesInOrder), - clusterApi.reasonsForNoServicesInGroupIsUp().map(SuspensionReasons::getMessagesInOrder)); + clusterApi.allServicesDown().map(SuspensionReasons::getMessagesInOrder)); assertEquals(expectedNoServicesOutsideGroupIsDown, clusterApi.noServicesOutsideGroupIsDown()); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java index 303dabebba8..8f47051621f 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java @@ -131,13 +131,14 @@ public class HostedVespaClusterPolicyTest { int percentageOfServicesDownIfGroupIsAllowedToBeDown, boolean expectSuccess) throws HostStateChangeDeniedException { when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown); - when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); + when(clusterApi.allServicesDown()).thenReturn(noServicesInGroupIsUp); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20); doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi); when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c")); when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type")); - when(clusterApi.percentageOfServicesDown()).thenReturn(5); + when(clusterApi.serviceDescription(true)).thenReturn("services of {service-type,cluster-id}"); + when(clusterApi.percentageOfServicesDownOutsideGroup()).thenReturn(5); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown); when(clusterApi.downDescription()).thenReturn(" Down description"); @@ -152,9 +153,10 @@ public class HostedVespaClusterPolicyTest { } } catch (HostStateChangeDeniedException e) { if (!expectSuccess) { - assertEquals("Changing the state of node-group would violate enough-services-up: " + - "Suspension of service with type 'service-type' would increase from 5% to 13%, " + - "over the limit of 10%. Down description", e.getMessage()); + assertEquals("Changing the state of node-group would violate enough-services-up: The percentage of downed " + + "or suspended services of {service-type,cluster-id} would increase from 5% to 13% (limit is 10%): " + + "Down description", + e.getMessage()); assertEquals("enough-services-up", e.getConstraintName()); } } diff --git a/searchlib/abi-spec.json b/searchlib/abi-spec.json index e8e3a3cb133..2468fd0c5c7 100644 --- a/searchlib/abi-spec.json +++ b/searchlib/abi-spec.json @@ -1034,6 +1034,7 @@ "public static final int LDEXP", "public static final int POW", "public static final int BIT", + "public static final int HAMMING", "public static final int MAP", "public static final int REDUCE", "public static final int JOIN", @@ -1389,7 +1390,8 @@ "public static final enum com.yahoo.searchlib.rankingexpression.rule.Function max", "public static final enum com.yahoo.searchlib.rankingexpression.rule.Function min", "public static final enum com.yahoo.searchlib.rankingexpression.rule.Function pow", - "public static final enum com.yahoo.searchlib.rankingexpression.rule.Function bit" + "public static final enum com.yahoo.searchlib.rankingexpression.rule.Function bit", + "public static final enum com.yahoo.searchlib.rankingexpression.rule.Function hamming" ] }, "com.yahoo.searchlib.rankingexpression.rule.FunctionNode": { diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java index e41732f9d16..33ba3c6ef4b 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java @@ -157,6 +157,7 @@ public class TensorValue extends Value { case fmod: return value.fmod(argument); case ldexp: return value.ldexp(argument); case bit: return value.bit(argument); + case hamming: return value.hamming(argument); default: throw new UnsupportedOperationException("Cannot combine two tensors using " + function); } } diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java index 16aa947986d..3958711f74b 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java @@ -46,7 +46,8 @@ public enum Function implements Serializable { max(2) { public double evaluate(double x, double y) { return max(x,y); } }, min(2) { public double evaluate(double x, double y) { return min(x,y); } }, pow(2) { public double evaluate(double x, double y) { return pow(x,y); } }, - bit(2) { public double evaluate(double x, double y) { return ((int)y < 8 && (int)y >= 0 && ((int)x & (1 << (int)y)) != 0) ? 1.0 : 0.0; } }; + bit(2) { public double evaluate(double x, double y) { return ((int)y < 8 && (int)y >= 0 && ((int)x & (1 << (int)y)) != 0) ? 1.0 : 0.0; } }, + hamming(2) { public double evaluate(double x, double y) { return ScalarFunctions.Hamming.hamming(x, y); } }; private final int arity; diff --git a/searchlib/src/main/javacc/RankingExpressionParser.jj b/searchlib/src/main/javacc/RankingExpressionParser.jj index 99eff010628..7bfbfd6c005 100755 --- a/searchlib/src/main/javacc/RankingExpressionParser.jj +++ b/searchlib/src/main/javacc/RankingExpressionParser.jj @@ -124,6 +124,7 @@ TOKEN : // MIN <POW: "pow"> | <BIT: "bit"> | + <HAMMING: "hamming"> | <MAP: "map"> | <REDUCE: "reduce"> | @@ -735,7 +736,8 @@ Function binaryFunctionName() : { } <MAX> { return Function.max; } | <MIN> { return Function.min; } | <POW> { return Function.pow; } | - <BIT> { return Function.bit; } + <BIT> { return Function.bit; } | + <HAMMING> { return Function.hamming; } } List<ExpressionNode> expressionList() : diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java index 4a3c4b248be..246dbcb2b1e 100644 --- a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java @@ -288,6 +288,8 @@ public class EvaluationTestCase { tester.assertEvaluates("{ {h:0}:1.5, {h:1}:1.5 }", "0.5 + tensor0", "{ {h:0}:1.0,{h:1}:1.0 }"); tester.assertEvaluates("{ {x:0,y:0}:0, {x:1,y:0}:0 }", "atan2(tensor0, tensor1)", "{ {x:0}:0, {x:1}:0 }", "{ {y:0}:1 }"); + tester.assertEvaluates("{ {x:0,y:0}:2, {x:1,y:0}:7 }", + "hamming(tensor0, tensor1)", "{ {x:0}:97, {x:1}:-1 }", "{ {y:0}:1 }"); tester.assertEvaluates("{ {x:0,y:0}:0, {x:1,y:0}:1 }", "tensor0 > tensor1", "{ {x:0}:3, {x:1}:7 }", "{ {y:0}:5 }"); tester.assertEvaluates("{ {x:0,y:0}:1, {x:1,y:0}:0 }", diff --git a/searchlib/src/tests/rankingexpression/rankingexpressionlist b/searchlib/src/tests/rankingexpression/rankingexpressionlist index 77b2294c668..d41570732d9 100644 --- a/searchlib/src/tests/rankingexpression/rankingexpressionlist +++ b/searchlib/src/tests/rankingexpression/rankingexpressionlist @@ -87,6 +87,7 @@ floor(10) relu(10) sigmoid(10) atan2(10, 20); atan2(10,20) +hamming(42, -16); hamming(42,-16) ldexp(10, 20); ldexp(10,20) pow(10, 20); pow(10,20) fmod(10, 20); fmod(10,20) diff --git a/security-utils/src/main/java/com/yahoo/security/X509CertificateUtils.java b/security-utils/src/main/java/com/yahoo/security/X509CertificateUtils.java index 215dc311af3..dbabf1274af 100644 --- a/security-utils/src/main/java/com/yahoo/security/X509CertificateUtils.java +++ b/security-utils/src/main/java/com/yahoo/security/X509CertificateUtils.java @@ -21,10 +21,13 @@ import java.io.UncheckedIOException; import java.math.BigInteger; import java.security.GeneralSecurityException; import java.security.KeyPair; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.security.PrivateKey; import java.security.PublicKey; import java.security.Signature; import java.security.SignatureException; +import java.security.cert.CertificateEncodingException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.Duration; @@ -178,4 +181,16 @@ public class X509CertificateUtils { .build(); return new X509CertificateWithKey(cert, keyPair.getPrivate()); } + + /** + * @return certificate SHA-1 fingerprint + */ + public static byte[] getX509CertificateFingerPrint(X509Certificate certificate) { + try { + MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); + return sha1.digest(certificate.getEncoded()); + } catch (CertificateEncodingException | NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } } diff --git a/slobrok/src/apps/slobrok/slobrok.cpp b/slobrok/src/apps/slobrok/slobrok.cpp index b2748762a12..18a401e1db6 100644 --- a/slobrok/src/apps/slobrok/slobrok.cpp +++ b/slobrok/src/apps/slobrok/slobrok.cpp @@ -50,7 +50,6 @@ App::Main() { uint32_t portnum = 2773; vespalib::string cfgId; - bool useNewLogic = false; int argi = 1; const char* optArg; @@ -64,7 +63,7 @@ App::Main() portnum = atoi(optArg); break; case 'N': - useNewLogic = true; + // ignored break; default: LOG(error, "unknown option letter '%c'", c); @@ -76,11 +75,11 @@ App::Main() if (cfgId.empty()) { LOG(debug, "no config id specified"); ConfigShim shim(portnum); - mainobj = std::make_unique<SBEnv>(shim, useNewLogic); + mainobj = std::make_unique<SBEnv>(shim); } else { ConfigShim shim(portnum, cfgId); shim.enableStateServer(true); - mainobj = std::make_unique<SBEnv>(shim, useNewLogic); + mainobj = std::make_unique<SBEnv>(shim); } hook_sigterm(); res = mainobj->MainLoop(); diff --git a/slobrok/src/vespa/slobrok/server/CMakeLists.txt b/slobrok/src/vespa/slobrok/server/CMakeLists.txt index ae1a05c5181..5168758e46d 100644 --- a/slobrok/src/vespa/slobrok/server/CMakeLists.txt +++ b/slobrok/src/vespa/slobrok/server/CMakeLists.txt @@ -1,7 +1,6 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(slobrok_slobrokserver SOURCES - cmd.cpp configshim.cpp exchange_manager.cpp i_monitored_server.cpp @@ -23,8 +22,6 @@ vespa_add_library(slobrok_slobrokserver request_completion_handler.cpp reserved_name.cpp rpc_mapping_monitor.cpp - rpc_server_manager.cpp - rpc_server_map.cpp rpchooks.cpp rpcmirror.cpp sbenv.cpp diff --git a/slobrok/src/vespa/slobrok/server/cmd.cpp b/slobrok/src/vespa/slobrok/server/cmd.cpp deleted file mode 100644 index df856189d89..00000000000 --- a/slobrok/src/vespa/slobrok/server/cmd.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - - -#include "cmd.h" -#include "rpc_server_map.h" -#include "reserved_name.h" -#include "remote_slobrok.h" -#include "sbenv.h" - -#include <vespa/log/log.h> -LOG_SETUP(".slobrok.server.cmd"); - -namespace slobrok { - -//----------------------------------------------------------------------------- - -struct ScriptData { - SBEnv &env; - const std::string name; - const std::string spec; - FRT_RPCRequest * const registerRequest; - - enum { - RDC_INIT, XCH_WANTADD, CHK_RPCSRV, XCH_DOADD, XCH_IGNORE, RDC_INVAL - } _state; - - ScriptData(SBEnv &e, const std::string &n, const std::string &s, FRT_RPCRequest *r) - : env(e), name(n), spec(s), registerRequest(r), _state(RDC_INIT) - {} -}; - -//----------------------------------------------------------------------------- - -const std::string & -ScriptCommand::name() { return _data->name; } - -const std::string & -ScriptCommand::spec() { return _data->spec; } - -ScriptCommand::ScriptCommand(std::unique_ptr<ScriptData> data) - : _data(std::move(data)) -{} - -ScriptCommand::ScriptCommand(ScriptCommand &&) = default; -ScriptCommand& -ScriptCommand::operator= (ScriptCommand &&) = default; -ScriptCommand::~ScriptCommand() = default; - -ScriptCommand -ScriptCommand::makeRegRpcSrvCmd(SBEnv &env, - const std::string &name, const std::string &spec, - FRT_RPCRequest *req) -{ - return ScriptCommand(std::make_unique<ScriptData>(env, name, spec, req)); -} - -ScriptCommand -ScriptCommand::makeIgnoreCmd(SBEnv &env, const std::string & name, const std::string &spec) -{ - auto data = std::make_unique<ScriptData>(env, name, spec, nullptr); - data->_state = ScriptData::XCH_IGNORE; - return ScriptCommand(std::move(data)); -} - -ScriptCommand -ScriptCommand::makeRegCompleter(SBEnv &env, - const std::string &name, const std::string &spec, - FRT_RPCRequest *req) -{ - auto data = std::make_unique<ScriptData>(env, name, spec, req); - data->_state = ScriptData::XCH_DOADD; - return ScriptCommand(std::move(data)); -} - -void -ScriptCommand::doRequest() -{ - LOG_ASSERT(_data->_state == ScriptData::RDC_INIT); - doneHandler(OkState()); -} - -void cleanupReservation(ScriptData & data) -{ - RpcServerMap &map = data.env.rpcServerMap(); - const ReservedName *rsvp = map.getReservation(data.name.c_str()); - if (rsvp != nullptr && rsvp->isLocal) { - map.removeReservation(data.name.c_str()); - } -} - -void -ScriptCommand::doneHandler(OkState result) -{ - LOG_ASSERT(_data); - std::unique_ptr<ScriptData> dataUP = std::move(_data); - LOG_ASSERT(! _data); - ScriptData & data = *dataUP; - const char *name_p = data.name.c_str(); - const char *spec_p = data.spec.c_str(); - ExchangeManager &xch = data.env.exchangeManager(); - RpcServerManager &rsm = data.env.rpcServerManager(); - - if (result.failed()) { - LOG(warning, "failed [%s->%s] in state %d: %s", name_p, spec_p, data._state, result.errorMsg.c_str()); - if (data._state != ScriptData::XCH_IGNORE) { - cleanupReservation(data); - } - // XXX should handle different state errors differently? - if (data.registerRequest != nullptr) { - data.registerRequest->SetError(FRTE_RPC_METHOD_FAILED, result.errorMsg.c_str()); - data.registerRequest->Return(); - } else { - LOG(warning, "ignored: %s", result.errorMsg.c_str()); - } - return; - } - if (data._state == ScriptData::RDC_INIT) { - LOG(spam, "phase wantAdd(%s,%s)", name_p, spec_p); - data._state = ScriptData::XCH_WANTADD; - xch.wantAdd(std::move(dataUP)); - return; - } else if (data._state == ScriptData::XCH_WANTADD) { - LOG(spam, "phase addManaged(%s,%s)", name_p, spec_p); - data._state = ScriptData::CHK_RPCSRV; - rsm.addManaged(std::move(dataUP)); - return; - } else if (data._state == ScriptData::CHK_RPCSRV) { - LOG(spam, "phase doAdd(%s,%s)", name_p, spec_p); - data._state = ScriptData::XCH_DOADD; - xch.doAdd(std::move(dataUP)); - return; - } else if (data._state == ScriptData::XCH_DOADD) { - LOG(debug, "done doAdd(%s,%s)", name_p, spec_p); - data._state = ScriptData::RDC_INVAL; - // all OK - if (data.registerRequest != nullptr) { - data.registerRequest->Return(); - } - cleanupReservation(data); - return; - } else if (data._state == ScriptData::XCH_IGNORE) { - return; - } - // no other state should be possible - LOG_ABORT("should not be reached"); -} - -//----------------------------------------------------------------------------- - -} // namespace slobrok diff --git a/slobrok/src/vespa/slobrok/server/cmd.h b/slobrok/src/vespa/slobrok/server/cmd.h deleted file mode 100644 index e7f42f75e42..00000000000 --- a/slobrok/src/vespa/slobrok/server/cmd.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "ok_state.h" -#include <memory> - -class FRT_RPCRequest; - -namespace slobrok { - -class SBEnv; -struct ScriptData; - -class ScriptCommand -{ -private: - std::unique_ptr<ScriptData> _data; - ScriptCommand(std::unique_ptr<ScriptData> data); -public: - const std::string &name(); - const std::string &spec(); - - ScriptCommand(ScriptCommand &&); - ScriptCommand& operator= (ScriptCommand &&); - ~ScriptCommand(); - - static ScriptCommand makeRegRpcSrvCmd(SBEnv &env, const std::string &name, const std::string &spec, FRT_RPCRequest *req); - static ScriptCommand makeIgnoreCmd(SBEnv &env, const std::string &name, const std::string &spec); - static ScriptCommand makeRegCompleter(SBEnv &env, const std::string &name, const std::string &spec, FRT_RPCRequest *req); - - void doneHandler(OkState result); - void doRequest(); -}; - -} // namespace slobrok diff --git a/slobrok/src/vespa/slobrok/server/exchange_manager.cpp b/slobrok/src/vespa/slobrok/server/exchange_manager.cpp index 94e951ca252..89167842c1b 100644 --- a/slobrok/src/vespa/slobrok/server/exchange_manager.cpp +++ b/slobrok/src/vespa/slobrok/server/exchange_manager.cpp @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "exchange_manager.h" -#include "rpc_server_map.h" #include "sbenv.h" #include <vespa/fnet/frt/supervisor.h> #include <vespa/vespalib/util/overload.h> @@ -14,11 +13,9 @@ namespace slobrok { //----------------------------------------------------------------------------- -ExchangeManager::ExchangeManager(SBEnv &env, RpcServerMap &rpcsrvmap) +ExchangeManager::ExchangeManager(SBEnv &env) : _partners(), - _env(env), - _rpcsrvmanager(env.rpcServerManager()), - _rpcsrvmap(rpcsrvmap) + _env(env) { } @@ -69,35 +66,13 @@ ExchangeManager::getPartnerList() void ExchangeManager::forwardRemove(const std::string & name, const std::string & spec) { - WorkPackage *package = new WorkPackage(WorkPackage::OP_REMOVE, *this, - ScriptCommand::makeIgnoreCmd(_env, name, spec)); + WorkPackage *package = new WorkPackage(WorkPackage::OP_REMOVE, ServiceMapping{name, spec}, *this); for (const auto & entry : _partners) { package->addItem(entry.second.get()); } package->expedite(); } -void -ExchangeManager::doAdd(ScriptCommand rdc) -{ - WorkPackage *package = new WorkPackage(WorkPackage::OP_DOADD, *this, std::move(rdc)); - - for (const auto & entry : _partners) { - package->addItem(entry.second.get()); - } - package->expedite(); -} - - -void -ExchangeManager::wantAdd(ScriptCommand rdc) -{ - WorkPackage *package = new WorkPackage(WorkPackage::OP_WANTADD, *this, std::move(rdc)); - for (const auto & entry : _partners) { - package->addItem(entry.second.get()); - } - package->expedite(); -} RemoteSlobrok * ExchangeManager::lookupPartner(const std::string & name) const { @@ -128,22 +103,8 @@ void ExchangeManager::healthCheck() { auto newWorldList = env().consensusMap().currentConsensus(); - if (! _env.useNewLogic()) { - auto oldWorldServices = env().rpcServerMap().allManaged(); - ServiceMappingList oldWorldList; - for (const auto *nsp : oldWorldServices) { - oldWorldList.emplace_back(nsp->getName(), nsp->getSpec()); - } - std::sort(oldWorldList.begin(), oldWorldList.end()); - vespalib::string diff = diffLists(oldWorldList, newWorldList); - if (! diff.empty()) { - LOG(warning, "Diff from old world rpcServerMap to new world consensus map: %s", - diff.c_str()); - } - } for (const auto & [ name, partner ] : _partners) { partner->maybeStartFetch(); - partner->maybePushMine(); auto remoteList = partner->remoteMap().allMappings(); // 0 is expected (when remote is down) if (remoteList.size() != 0) { @@ -205,14 +166,12 @@ ExchangeManager::WorkPackage::WorkItem::~WorkItem() } -ExchangeManager::WorkPackage::WorkPackage(op_type op, - ExchangeManager &exchanger, - ScriptCommand script) +ExchangeManager::WorkPackage::WorkPackage(op_type op, const ServiceMapping &mapping, ExchangeManager &exchanger) : _work(), _doneCnt(0), _numDenied(0), - _script(std::move(script)), _exchanger(exchanger), + _mapping(mapping), _optype(op) { } @@ -230,9 +189,9 @@ ExchangeManager::WorkPackage::doneItem(bool denied) (int)_doneCnt, (int)_work.size(), (int)_numDenied); if (_doneCnt == _work.size()) { if (_numDenied > 0) { - _script.doneHandler(OkState(_numDenied, "denied by remote")); - } else { - _script.doneHandler(OkState()); + LOG(debug, "work package [%s->%s]: %zd/%zd denied by remote", + _mapping.name.c_str(), _mapping.spec.c_str(), + _numDenied, _doneCnt); } delete this; } @@ -245,18 +204,12 @@ ExchangeManager::WorkPackage::addItem(RemoteSlobrok *partner) if (! partner->isConnected()) { return; } - const char *name_p = _script.name().c_str(); - const char *spec_p = _script.spec().c_str(); + const char *name_p = _mapping.name.c_str(); + const char *spec_p = _mapping.spec.c_str(); FRT_RPCRequest *r = _exchanger._env.getSupervisor()->AllocRPCRequest(); - // XXX should recheck rpcsrvmap again - if (_optype == OP_REMOVE) { - r->SetMethodName("slobrok.internal.doRemove"); - } else if (_optype == OP_WANTADD) { - r->SetMethodName("slobrok.internal.wantAdd"); - } else if (_optype == OP_DOADD) { - r->SetMethodName("slobrok.internal.doAdd"); - } + LOG_ASSERT(_optype == OP_REMOVE); + r->SetMethodName("slobrok.internal.doRemove"); r->GetParams()->AddString(_exchanger._env.mySpec().c_str()); r->GetParams()->AddString(name_p); r->GetParams()->AddString(spec_p); @@ -274,7 +227,6 @@ ExchangeManager::WorkPackage::expedite() size_t sz = _work.size(); if (sz == 0) { // no remotes need doing. - _script.doneHandler(OkState()); delete this; return; } diff --git a/slobrok/src/vespa/slobrok/server/exchange_manager.h b/slobrok/src/vespa/slobrok/server/exchange_manager.h index 18a20274a75..6891686c94d 100644 --- a/slobrok/src/vespa/slobrok/server/exchange_manager.h +++ b/slobrok/src/vespa/slobrok/server/exchange_manager.h @@ -2,7 +2,6 @@ #pragma once #include "ok_state.h" -#include "cmd.h" #include "remote_slobrok.h" #include <deque> @@ -14,8 +13,6 @@ namespace slobrok { //----------------------------------------------------------------------------- class SBEnv; -class RpcServerMap; -class RpcServerManager; //----------------------------------------------------------------------------- @@ -61,37 +58,31 @@ private: std::vector<std::unique_ptr<WorkItem>> _work; size_t _doneCnt; size_t _numDenied; - ScriptCommand _script; public: ExchangeManager &_exchanger; - enum op_type { OP_NOP, OP_WANTADD, OP_DOADD, OP_REMOVE }; - op_type _optype; + enum op_type { OP_REMOVE }; + const ServiceMapping _mapping; + const op_type _optype; void addItem(RemoteSlobrok *partner); void doneItem(bool denied); void expedite(); WorkPackage(const WorkPackage&) = delete; WorkPackage& operator= (const WorkPackage&) = delete; - WorkPackage(op_type op, - ExchangeManager &exchanger, - ScriptCommand donehandler); + WorkPackage(op_type op, const ServiceMapping &mapping, ExchangeManager &exchanger); ~WorkPackage(); }; SBEnv &_env; - RpcServerManager &_rpcsrvmanager; - RpcServerMap &_rpcsrvmap; vespalib::string diffLists(const ServiceMappingList &lhs, const ServiceMappingList &rhs); public: ExchangeManager(const ExchangeManager &) = delete; ExchangeManager &operator=(const ExchangeManager &) = delete; - ExchangeManager(SBEnv &env, RpcServerMap &rpcsrvmap); + ExchangeManager(SBEnv &env); ~ExchangeManager(); SBEnv &env() { return _env; } - RpcServerManager &rpcServerManager() { return _rpcsrvmanager; } - RpcServerMap &rpcServerMap() { return _rpcsrvmap; } OkState addPartner(const std::string & spec); void removePartner(const std::string & spec); @@ -99,9 +90,6 @@ public: void forwardRemove(const std::string & name, const std::string & spec); - void wantAdd(ScriptCommand rdc); - void doAdd(ScriptCommand rdc); - RemoteSlobrok *lookupPartner(const std::string & name) const; void healthCheck(); }; diff --git a/slobrok/src/vespa/slobrok/server/local_rpc_monitor_map.h b/slobrok/src/vespa/slobrok/server/local_rpc_monitor_map.h index 96c0ee03245..173a0455e43 100644 --- a/slobrok/src/vespa/slobrok/server/local_rpc_monitor_map.h +++ b/slobrok/src/vespa/slobrok/server/local_rpc_monitor_map.h @@ -1,8 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "cmd.h" -#include "managed_rpc_server.h" #include "map_listener.h" #include "map_source.h" #include "mapping_monitor.h" @@ -12,6 +10,8 @@ #include "service_map_history.h" #include "service_mapping.h" +#include <vespa/fnet/task.h> + #include <vector> #include <memory> #include <map> diff --git a/slobrok/src/vespa/slobrok/server/remote_check.cpp b/slobrok/src/vespa/slobrok/server/remote_check.cpp index 157b959dbfe..da4d1ebc3dd 100644 --- a/slobrok/src/vespa/slobrok/server/remote_check.cpp +++ b/slobrok/src/vespa/slobrok/server/remote_check.cpp @@ -2,8 +2,6 @@ #include "remote_check.h" #include "named_service.h" -#include "rpc_server_map.h" -#include "rpc_server_manager.h" #include "remote_slobrok.h" #include "random.h" #include "exchange_manager.h" @@ -13,12 +11,9 @@ LOG_SETUP(".slobrok.server.remote_check"); namespace slobrok { -RemoteCheck::RemoteCheck(FNET_Scheduler *sched, - RpcServerMap& rpcsrvmap, - RpcServerManager& rpcsrvman, - ExchangeManager& exch) +RemoteCheck::RemoteCheck(FNET_Scheduler *sched, ExchangeManager& exch) : FNET_Task(sched), - _rpcsrvmap(rpcsrvmap), _rpcsrvmanager(rpcsrvman), _exchanger(exch) + _exchanger(exch) { double seconds = randomIn(15.3, 27.9); Schedule(seconds); diff --git a/slobrok/src/vespa/slobrok/server/remote_check.h b/slobrok/src/vespa/slobrok/server/remote_check.h index e0cf89c177d..11eb85401fe 100644 --- a/slobrok/src/vespa/slobrok/server/remote_check.h +++ b/slobrok/src/vespa/slobrok/server/remote_check.h @@ -20,17 +20,12 @@ class ExchangeManager; class RemoteCheck : public FNET_Task { private: - RpcServerMap &_rpcsrvmap; - RpcServerManager &_rpcsrvmanager; ExchangeManager &_exchanger; RemoteCheck(const RemoteCheck &); // Not used RemoteCheck &operator=(const RemoteCheck &); // Not used public: - explicit RemoteCheck(FNET_Scheduler *sched, - RpcServerMap& rpcsrvmap, - RpcServerManager& rpcsrvman, - ExchangeManager& exchanger); + explicit RemoteCheck(FNET_Scheduler *sched, ExchangeManager& exchanger); ~RemoteCheck(); private: void PerformTask() override; diff --git a/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp b/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp index d867d955dac..4b308e90e37 100644 --- a/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp +++ b/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp @@ -1,8 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "remote_slobrok.h" -#include "rpc_server_map.h" -#include "rpc_server_manager.h" #include "exchange_manager.h" #include "sbenv.h" #include <vespa/fnet/frt/supervisor.h> @@ -18,7 +16,6 @@ namespace slobrok { RemoteSlobrok::RemoteSlobrok(const std::string &name, const std::string &spec, ExchangeManager &manager) : _exchanger(manager), - _rpcsrvmanager(manager.rpcServerManager()), _remote(nullptr), _serviceMapMirror(), _rpcserver(name, spec, *this), @@ -26,11 +23,7 @@ RemoteSlobrok::RemoteSlobrok(const std::string &name, const std::string &spec, _failCnt(0), _consensusSubscription(MapSubscription::subscribe(_serviceMapMirror, _exchanger.env().consensusMap())), _remAddPeerReq(nullptr), - _remListReq(nullptr), - _remAddReq(nullptr), - _remRemReq(nullptr), - _remFetchReq(nullptr), - _pending() + _remFetchReq(nullptr) { _rpcserver.healthCheck(); } @@ -38,8 +31,6 @@ RemoteSlobrok::RemoteSlobrok(const std::string &name, const std::string &spec, void RemoteSlobrok::shutdown() { _reconnecter.disable(); - _pending.clear(); - if (_remote != nullptr) { _remote->SubRef(); _remote = nullptr; @@ -51,15 +42,6 @@ void RemoteSlobrok::shutdown() { if (_remAddPeerReq != nullptr) { _remAddPeerReq->Abort(); } - if (_remListReq != nullptr) { - _remListReq->Abort(); - } - if (_remAddReq != nullptr) { - _remAddReq->Abort(); - } - if (_remRemReq != nullptr) { - _remRemReq->Abort(); - } _serviceMapMirror.clear(); } @@ -68,39 +50,6 @@ RemoteSlobrok::~RemoteSlobrok() { // _rpcserver destructor called automatically } -void -RemoteSlobrok::doPending() -{ - if (_remAddReq != nullptr) return; - if (_remRemReq != nullptr) return; - - if (_remote == nullptr) return; - - if ( ! _pending.empty() ) { - std::unique_ptr<NamedService> todo = std::move(_pending.front()); - _pending.pop_front(); - - const NamedService *rpcsrv = _exchanger.rpcServerMap().lookup(todo->getName()); - - if (rpcsrv == nullptr) { - _remRemReq = getSupervisor()->AllocRPCRequest(); - _remRemReq->SetMethodName("slobrok.internal.doRemove"); - _remRemReq->GetParams()->AddString(_exchanger.env().mySpec().c_str()); - _remRemReq->GetParams()->AddString(todo->getName().c_str()); - _remRemReq->GetParams()->AddString(todo->getSpec().c_str()); - _remote->InvokeAsync(_remRemReq, 2.0, this); - } else { - _remAddReq = getSupervisor()->AllocRPCRequest(); - _remAddReq->SetMethodName("slobrok.internal.doAdd"); - _remAddReq->GetParams()->AddString(_exchanger.env().mySpec().c_str()); - _remAddReq->GetParams()->AddString(todo->getName().c_str()); - _remAddReq->GetParams()->AddString(rpcsrv->getSpec().c_str()); - _remote->InvokeAsync(_remAddReq, 2.0, this); - } - // XXX should save this and pick up on RequestDone() - } -} - void RemoteSlobrok::maybeStartFetch() { if (_remFetchReq != nullptr) return; if (_remote == nullptr) return; @@ -168,21 +117,6 @@ void RemoteSlobrok::handleFetchResult() { } } - - -void -RemoteSlobrok::pushMine() -{ - // all mine - std::vector<const NamedService *> mine = _exchanger.rpcServerMap().allManaged(); - while (mine.size() > 0) { - const NamedService *now = mine.back(); - mine.pop_back(); - _pending.push_back(std::make_unique<NamedService>(now->getName(), now->getSpec())); - } - doPending(); -} - void RemoteSlobrok::RequestDone(FRT_RPCRequest *req) { @@ -190,7 +124,6 @@ RemoteSlobrok::RequestDone(FRT_RPCRequest *req) handleFetchResult(); return; } - FRT_Values &answer = *(req->GetReturn()); if (req == _remAddPeerReq) { // handle response after asking remote slobrok to add me as a peer: if (req->IsError()) { @@ -201,96 +134,15 @@ RemoteSlobrok::RequestDone(FRT_RPCRequest *req) myname, myspec, getName().c_str(), getSpec().c_str(), req->GetErrorMessage()); req->SubRef(); _remAddPeerReq = nullptr; - goto retrylater; + fail(); + return; } req->SubRef(); _remAddPeerReq = nullptr; - // next step is to ask the remote to send its list of managed names: - LOG_ASSERT(_remListReq == nullptr); - _remListReq = getSupervisor()->AllocRPCRequest(); - _remListReq->SetMethodName("slobrok.internal.listManagedRpcServers"); - if (_remote != nullptr) { - _remote->InvokeAsync(_remListReq, 3.0, this); - } - // when _remListReq is returned, our managed list is added - } else if (req == _remListReq) { - // handle the list sent from the remote: - if (req->IsError() - || strcmp(answer.GetTypeString(), "SS") != 0) - { - LOG(error, "error listing remote slobrok %s at %s: %s", - getName().c_str(), getSpec().c_str(), req->GetErrorMessage()); - req->SubRef(); - _remListReq = nullptr; - goto retrylater; - } - uint32_t numNames = answer.GetValue(0)._string_array._len; - uint32_t numSpecs = answer.GetValue(1)._string_array._len; - - if (numNames != numSpecs) { - LOG(error, "inconsistent array lengths from %s at %s", getName().c_str(), getSpec().c_str()); - req->SubRef(); - _remListReq = nullptr; - goto retrylater; - } - FRT_StringValue *names = answer.GetValue(0)._string_array._pt; - FRT_StringValue *specs = answer.GetValue(1)._string_array._pt; - - for (uint32_t idx = 0; idx < numNames; idx++) { - _rpcsrvmanager.addRemote(names[idx]._str, specs[idx]._str); - } - req->SubRef(); - _remListReq = nullptr; - - // next step is to push the ones I own: - maybeStartFetch(); - maybePushMine(); - } else if (req == _remAddReq) { - // handle response after pushing some name that we managed: - if (req->IsError() && (req->GetErrorCode() == FRTE_RPC_CONNECTION || - req->GetErrorCode() == FRTE_RPC_TIMEOUT)) - { - LOG(error, "connection error adding to remote slobrok: %s", req->GetErrorMessage()); - req->SubRef(); - _remAddReq = nullptr; - goto retrylater; - } - if (req->IsError()) { - FRT_Values &args = *req->GetParams(); - const char *rpcsrvname = args[1]._string._str; - const char *rpcsrvspec = args[2]._string._str; - LOG(warning, "error adding [%s -> %s] to remote slobrok: %s", - rpcsrvname, rpcsrvspec, req->GetErrorMessage()); - _rpcsrvmanager.removeLocal(rpcsrvname, rpcsrvspec); - } - req->SubRef(); - _remAddReq = nullptr; - doPending(); - } else if (req == _remRemReq) { - // handle response after pushing some remove we had pending: - if (req->IsError() && (req->GetErrorCode() == FRTE_RPC_CONNECTION || - req->GetErrorCode() == FRTE_RPC_TIMEOUT)) - { - LOG(error, "connection error adding to remote slobrok: %s", req->GetErrorMessage()); - req->SubRef(); - _remRemReq = nullptr; - goto retrylater; - } - if (req->IsError()) { - LOG(warning, "error removing on remote slobrok: %s", req->GetErrorMessage()); - } - req->SubRef(); - _remRemReq = nullptr; - doPending(); } else { LOG(error, "got unknown request back in RequestDone()"); LOG_ASSERT(req == nullptr); } - - return; - retrylater: - fail(); - return; } @@ -321,25 +173,6 @@ RemoteSlobrok::fail() _reconnecter.scheduleTryConnect(); } - -void -RemoteSlobrok::maybePushMine() -{ - if (_remote != nullptr && - _remAddPeerReq == nullptr && - _remListReq == nullptr && - _remAddReq == nullptr && - _remRemReq == nullptr) - { - LOG(debug, "spamming remote at %s with my names", getName().c_str()); - pushMine(); - } else { - LOG(debug, "not pushing mine, as we have: remote %p r.a.p.r=%p r.l.r=%p r.a.r=%p r.r.r=%p", - _remote, _remAddPeerReq, _remListReq, _remAddReq, _remRemReq); - } -} - - void RemoteSlobrok::notifyOkRpcSrv(ManagedRpcServer *rpcsrv) { @@ -357,10 +190,7 @@ RemoteSlobrok::notifyOkRpcSrv(ManagedRpcServer *rpcsrv) _remote = getSupervisor()->GetTarget(getSpec().c_str()); maybeStartFetch(); - // at this point, we will do (in sequence): - // ask peer to connect to us too; - // ask peer for its list of managed rpcservers, adding to our database - // add our managed rpcserver on peer + // at this point, we will ask peer to connect to us too; // any failure will cause disconnect and retry. _remAddPeerReq = getSupervisor()->AllocRPCRequest(); @@ -368,7 +198,6 @@ RemoteSlobrok::notifyOkRpcSrv(ManagedRpcServer *rpcsrv) _remAddPeerReq->GetParams()->AddString(_exchanger.env().mySpec().c_str()); _remAddPeerReq->GetParams()->AddString(_exchanger.env().mySpec().c_str()); _remote->InvokeAsync(_remAddPeerReq, 3.0, this); - // when _remAddPeerReq is returned, our managed list is added via doAdd() } void diff --git a/slobrok/src/vespa/slobrok/server/remote_slobrok.h b/slobrok/src/vespa/slobrok/server/remote_slobrok.h index b980aa90de0..ef7f39c08ed 100644 --- a/slobrok/src/vespa/slobrok/server/remote_slobrok.h +++ b/slobrok/src/vespa/slobrok/server/remote_slobrok.h @@ -2,7 +2,6 @@ #pragma once #include "ok_state.h" -#include "cmd.h" #include "i_rpc_server_manager.h" #include "managed_rpc_server.h" #include "service_map_mirror.h" @@ -12,7 +11,6 @@ namespace slobrok { //----------------------------------------------------------------------------- -class RpcServerManager; class ExchangeManager; //----------------------------------------------------------------------------- @@ -43,7 +41,6 @@ private: }; ExchangeManager &_exchanger; - RpcServerManager &_rpcsrvmanager; FRT_Target *_remote; ServiceMapMirror _serviceMapMirror; ManagedRpcServer _rpcserver; @@ -53,14 +50,8 @@ private: std::unique_ptr<MapSubscription> _consensusSubscription; FRT_RPCRequest *_remAddPeerReq; - FRT_RPCRequest *_remListReq; - FRT_RPCRequest *_remAddReq; - FRT_RPCRequest *_remRemReq; FRT_RPCRequest *_remFetchReq; - std::deque<std::unique_ptr<NamedService>> _pending; - void pushMine(); - void doPending(); void handleFetchResult(); public: @@ -72,7 +63,6 @@ public: void fail(); bool isConnected() const { return (_remote != nullptr); } void tryConnect(); - void maybePushMine(); void maybeStartFetch(); void invokeAsync(FRT_RPCRequest *req, double timeout, FRT_IRequestWait *rwaiter); const std::string & getName() const { return _rpcserver.getName(); } diff --git a/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp b/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp deleted file mode 100644 index a9a748323f7..00000000000 --- a/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "rpc_server_manager.h" -#include "reserved_name.h" -#include "rpc_server_map.h" -#include "remote_slobrok.h" -#include "sbenv.h" -#include <vespa/vespalib/util/stringfmt.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".slobrok.server.rpc_server_manager"); - -using vespalib::make_string_short::fmt; - -namespace slobrok { - -RpcServerManager::RpcServerManager(SBEnv &sbenv) - : FNET_Task(sbenv.getScheduler()), - _rpcsrvmap(sbenv.rpcServerMap()), - _exchanger(sbenv.exchangeManager()), - _env(sbenv), - _addManageds(), - _deleteList() -{ -} - -static OkState -validateName(const std::string & rpcsrvname) -{ - const char *p = rpcsrvname.c_str(); - while (*p != '\0') { - // important: disallow '*' - if (strchr("+,-./:=@[]_{}~<>" - "0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz", *p) == nullptr) - { - std::ostringstream tmp; - tmp << "Illegal character '" << *p << "' ("; - tmp << (int)(*p)<< ") in rpcserver name"; - return OkState(13, tmp.str().c_str()); - } - ++p; - } - if (p == rpcsrvname) { - return OkState(13, "empty rpcserver name"); - } - return OkState(); -} - - -OkState -RpcServerManager::checkPartner(const std::string & remslobrok) -{ - if (remslobrok == _env.mySpec()) { - return OkState(13, "remote slobrok using my rpcserver name"); - } - const RemoteSlobrok *partner = _exchanger.lookupPartner(remslobrok); - if (partner == nullptr) { - return OkState(13, "remote slobrok not a partner"); - } - return OkState(); -} - -OkState -RpcServerManager::addRemReservation(const std::string & remslobrok, const std::string & name, const std::string &spec) -{ - OkState state = checkPartner(remslobrok); - if (state.failed()) return state; - - OkState valid = validateName(name); - if (valid.failed()) return valid; - - const NamedService *old = _rpcsrvmap.lookupManaged(name); - if (old != nullptr) { - if (old->getSpec() == spec) { - // was alright already - return OkState(0, "already registered"); - } - LOG(warning, "remote %s tried to register [%s -> %s] but we already have [%s -> %s] registered!", - remslobrok.c_str(), name.c_str(), spec.c_str(), old->getName().c_str(), old->getSpec().c_str()); - return OkState(FRTE_RPC_METHOD_FAILED, "already managed by me"); - } - if (_rpcsrvmap.conflictingReservation(name, spec)) { - return OkState(FRTE_RPC_METHOD_FAILED, "registration for name already in progress"); - } - _rpcsrvmap.addReservation(std::make_unique<ReservedName>(name, spec, false)); - return OkState(0, "done"); -} - - -OkState -RpcServerManager::addMyReservation(const std::string & name, const std::string & spec) -{ - OkState valid = validateName(name); - if (valid.failed()) return valid; - - const NamedService *old = _rpcsrvmap.lookupManaged(name); - if (old != nullptr) { - if (old->getSpec() == spec) { - // was alright already - return OkState(0, "already registered"); - } else { - return OkState(FRTE_RPC_METHOD_FAILED, fmt("name %s registered (to %s), cannot register %s", - name.c_str(), old->getSpec().c_str(), spec.c_str())); - } - } - - // check if we already are in the progress of adding this - if (_rpcsrvmap.conflictingReservation(name, spec)) { - const ReservedName * rsv = _rpcsrvmap.getReservation(name); - LOG(warning, "conflicting registrations: wanted [%s -> %s] but [%s -> %s] already reserved", - name.c_str(), spec.c_str(), rsv->getName().c_str(), rsv->getSpec().c_str()); - return OkState(FRTE_RPC_METHOD_FAILED, - "registration for name already in progress with a different spec"); - } - _rpcsrvmap.removeReservation(name); - _rpcsrvmap.addReservation(std::make_unique<ReservedName>(name, spec, true)); - return OkState(0, "done"); -} - - -OkState -RpcServerManager::addRemote(const std::string & name, const std::string &spec) -{ - OkState valid = validateName(name); - if (valid.failed()) return valid; - - if (alreadyManaged(name, spec)) { - return OkState(0, "already correct"); - } - const NamedService *old = _rpcsrvmap.lookup(name); - if (old != nullptr) { - if (old->getSpec() != spec) { - LOG(warning, "collision on remote add: name %s registered to %s locally, " - "but another location broker wants it registered to %s", - name.c_str(), old->getSpec().c_str(), spec.c_str()); - removeRemote(name, old->getSpec()); - return OkState(13, "registered, with different spec"); - } - // was alright already, remove reservation - _rpcsrvmap.removeReservation(name); - return OkState(0, "already correct"); - } - _rpcsrvmap.removeReservation(name); - auto rpcsrv = std::make_unique<ManagedRpcServer>(name, spec, *this); - _rpcsrvmap.addNew(std::move(rpcsrv)); - return OkState(0, "done"); -} - -OkState -RpcServerManager::remove(ManagedRpcServer *rpcsrv) -{ - const NamedService *td = _rpcsrvmap.lookup(rpcsrv->getName()); - if (td == rpcsrv) { - return removeLocal(rpcsrv->getName(), rpcsrv->getSpec()); - } else { - return OkState(1, "not currently registered"); - } -} - - -OkState -RpcServerManager::removeRemote(const std::string &name, const std::string &spec) -{ - const NamedService *old = _rpcsrvmap.lookup(name); - if (old == nullptr) { - // was alright already, remove any reservation too - _rpcsrvmap.removeReservation(name); - return OkState(0, "already done"); - } - if (old->getSpec() != spec) { - return OkState(1, "name registered, but with different spec"); - } - std::unique_ptr<NamedService> td = _rpcsrvmap.remove(name); - LOG_ASSERT(td.get() == old); - return OkState(0, "done"); -} - -OkState -RpcServerManager::removeLocal(const std::string & name, const std::string &spec) -{ - const NamedService *td = _rpcsrvmap.lookup(name); - if (td == nullptr) { - // already removed, nop - return OkState(); - } - - const RemoteSlobrok *partner = _exchanger.lookupPartner(name); - if (partner != nullptr) { - return OkState(13, "cannot unregister partner slobrok"); - } - - const ManagedRpcServer *rpcsrv = _rpcsrvmap.lookupManaged(name); - if (rpcsrv == nullptr) { - return OkState(13, "not a local rpcserver"); - } - - if (rpcsrv->getSpec() != spec) { - // the client can probably ignore this "error" - // or log it on level INFO? - return OkState(1, fmt("name registered, but with different spec (%s)", rpcsrv->getSpec().c_str())); - } - auto tdUP = _rpcsrvmap.remove(name); - LOG_ASSERT(tdUP.get() == rpcsrv); - _exchanger.forwardRemove(name, spec); - return OkState(); -} - - -void -RpcServerManager::addManaged(ScriptCommand rdc) -{ - const std::string &name = rdc.name(); - const std::string &spec = rdc.spec(); - auto newRpcServer = std::make_unique<ManagedRpcServer>(name, spec, *this); - ManagedRpcServer & rpcsrv = *newRpcServer; - _rpcsrvmap.addNew(std::move(newRpcServer)); - for (size_t i = 0; i < _addManageds.size(); i++) { - if (_addManageds[i].rpcsrv == nullptr) { - _addManageds[i].rpcsrv = &rpcsrv; - _addManageds[i].handler = std::move(rdc); - rpcsrv.healthCheck(); - return; - } - } - _addManageds.emplace_back(&rpcsrv, std::move(rdc)); - rpcsrv.healthCheck(); - return; -} - - - -bool -RpcServerManager::alreadyManaged(const std::string &name, const std::string &spec) -{ - const ManagedRpcServer *rpcsrv = _rpcsrvmap.lookupManaged(name); - if (rpcsrv != nullptr) { - if (rpcsrv->getSpec() == spec) { - return true; - } - } - return false; -} - - -RpcServerManager::~RpcServerManager() -{ - Kill(); - PerformTask(); -} - - -void -RpcServerManager::PerformTask() -{ - std::vector<std::unique_ptr<NamedService>> deleteAfterSwap; - std::swap(deleteAfterSwap, _deleteList); -} - - -void -RpcServerManager::notifyFailedRpcSrv(ManagedRpcServer *rpcsrv, std::string errmsg) -{ - _env.countFailedHeartbeat(); - const auto &name = rpcsrv->getName(); - const auto &spec = rpcsrv->getSpec(); - const char *namep = name.c_str(); - const char *specp = spec.c_str(); - std::unique_ptr<NamedService> toDelete; - const NamedService *old = _rpcsrvmap.lookup(rpcsrv->getName()); - if (old == rpcsrv) { - toDelete = _rpcsrvmap.remove(name); - LOG_ASSERT(toDelete.get() == rpcsrv); - LOG(info, "managed server %s at %s failed: %s", namep, specp, errmsg.c_str()); - } else { - // only managed servers should exist, this is bad: - LOG(error, "unmanaged server %s at %s failed: %s", namep, specp, errmsg.c_str()); - } - _exchanger.forwardRemove(name, spec); - for (size_t i = 0; i < _addManageds.size(); ++i) { - if (_addManageds[i].rpcsrv == rpcsrv) { - LOG(warning, "rpcserver %s at %s failed while trying to register", namep, specp); - _addManageds[i].rpcsrv = nullptr; - _addManageds[i].handler.doneHandler(OkState(13, "failed check using listNames callback")); - } - } - if (toDelete) { - _deleteList.push_back(std::move(toDelete)); - ScheduleNow(); - } -} - -void -RpcServerManager::notifyOkRpcSrv(ManagedRpcServer *rpcsrv) -{ - for (size_t i = 0; i < _addManageds.size(); ++i) { - if (_addManageds[i].rpcsrv == rpcsrv) { - _addManageds[i].handler.doneHandler(OkState()); - _addManageds[i].rpcsrv = 0; - } - } - // XXX check if pending wantAdd / doAdd / registerRpcServer -} - -FRT_Supervisor * -RpcServerManager::getSupervisor() -{ - return _env.getSupervisor(); -} - -//----------------------------------------------------------------------------- - -} // namespace slobrok diff --git a/slobrok/src/vespa/slobrok/server/rpc_server_manager.h b/slobrok/src/vespa/slobrok/server/rpc_server_manager.h deleted file mode 100644 index 15b674388e3..00000000000 --- a/slobrok/src/vespa/slobrok/server/rpc_server_manager.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "ok_state.h" -#include "cmd.h" -#include "i_rpc_server_manager.h" -#include "named_service.h" -#include <vespa/fnet/task.h> -#include <vector> -#include <memory> - -namespace slobrok { - -class NamedService; -class ManagedRpcServer; -class RemoteSlobrok; -class ReservedName; -class RpcServerMap; -class ExchangeManager; -class SBEnv; - -/** - * @class RpcServerManager - * @brief Main "business logic" for the service location broker. - * - * Used by all external and some internal operations. - * This class actually implements operations, - * checking for validity, manipulating internal datastructures, - * and initiating synchronization operations to peer slobroks. - **/ -class RpcServerManager : public FNET_Task, - public IRpcServerManager -{ -private: - RpcServerMap &_rpcsrvmap; - ExchangeManager &_exchanger; - SBEnv &_env; - - struct MRSandRRSC { - ManagedRpcServer *rpcsrv; - ScriptCommand handler; - MRSandRRSC(ManagedRpcServer *d, ScriptCommand h) - : rpcsrv(d), handler(std::move(h)) {} - }; - std::vector<MRSandRRSC> _addManageds; - std::vector<std::unique_ptr<NamedService>> _deleteList; -public: - OkState checkPartner(const std::string & remslobrok); - - OkState addRemote(const std::string & name, const std::string & spec); - - OkState addRemReservation(const std::string & remslobrok, const std::string & name, const std::string & spec); - OkState addMyReservation(const std::string & name, const std::string & spec); - - bool alreadyManaged(const std::string & name, const std::string & spec); - void addManaged(ScriptCommand rdc); - - OkState remove(ManagedRpcServer *rpcsrv); - - OkState removeLocal(const std::string & name, const std::string & spec); - OkState removeRemote(const std::string & name, const std::string & spec); - - RpcServerManager(const RpcServerManager &) = delete; - RpcServerManager &operator=(const RpcServerManager &) = delete; - RpcServerManager(SBEnv &sbenv); - ~RpcServerManager(); - - void PerformTask() override; - void notifyFailedRpcSrv(ManagedRpcServer *rpcsrv, std::string errmsg) override; - void notifyOkRpcSrv(ManagedRpcServer *rpcsrv) override; - FRT_Supervisor *getSupervisor() override; -}; - -//----------------------------------------------------------------------------- - -} // namespace slobrok - diff --git a/slobrok/src/vespa/slobrok/server/rpc_server_map.cpp b/slobrok/src/vespa/slobrok/server/rpc_server_map.cpp deleted file mode 100644 index fcaaf57570c..00000000000 --- a/slobrok/src/vespa/slobrok/server/rpc_server_map.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "rpc_server_map.h" -#include "reserved_name.h" -#include "rpc_server_manager.h" -#include "sbenv.h" - -#include <vespa/log/log.h> -LOG_SETUP(".slobrok.server.rpc_server_map"); - -namespace slobrok { - -//----------------------------------------------------------------------------- - -ManagedRpcServer * -RpcServerMap::lookupManaged(const std::string & name) const { - auto found = _myrpcsrv_map.find(name); - return (found == _myrpcsrv_map.end()) ? nullptr : found->second.get(); -} - -const NamedService * -RpcServerMap::lookup(const std::string & name) const -{ - return lookupManaged(name); -} - -std::unique_ptr<NamedService> -RpcServerMap::remove(const std::string & name) -{ - auto service = std::move(_myrpcsrv_map[name]); - auto spec = service->getSpec(); - _proxy.remove(ServiceMapping{name, spec}); - _myrpcsrv_map.erase(name); - return service; -} - -std::vector<const NamedService *> -RpcServerMap::allManaged() const -{ - std::vector<const NamedService *> retval; - // get list of all names in myrpcsrv_map - for (const auto & entry : _myrpcsrv_map) { - retval.push_back(entry.second.get()); - } - return retval; -} - - -void -RpcServerMap::add(NamedService *rpcsrv) -{ - const std::string &name = rpcsrv->getName(); - - LOG_ASSERT(rpcsrv != nullptr); - LOG_ASSERT(_myrpcsrv_map.find(name) == _myrpcsrv_map.end()); - - removeReservation(name); - _proxy.add(ServiceMapping{name, rpcsrv->getSpec()}); -} - -void -RpcServerMap::addNew(std::unique_ptr<ManagedRpcServer> rpcsrv) -{ - const std::string &name = rpcsrv->getName(); - - auto oldman = std::move(_myrpcsrv_map[name]); - _myrpcsrv_map.erase(name); - - if (oldman) { - const ReservedName *oldres = _reservations[name].get(); - const std::string &spec = rpcsrv->getSpec(); - _proxy.remove(ServiceMapping{name, spec}); - const std::string &oldname = oldman->getName(); - const std::string &oldspec = oldman->getSpec(); - if (spec != oldspec) { - LOG(warning, "internal state problem: adding [%s at %s] but already had [%s at %s]", - name.c_str(), spec.c_str(), oldname.c_str(), oldspec.c_str()); - if (oldres != nullptr) { - const std::string &n = oldres->getName(); - const std::string &s = oldres->getSpec(); - LOG(warning, "old reservation: [%s at %s]", n.c_str(), s.c_str()); - } - } - } - add(rpcsrv.get()); - _myrpcsrv_map[name] = std::move(rpcsrv); -} - - -void -RpcServerMap::addReservation(std::unique_ptr<ReservedName> rpcsrv) -{ - LOG_ASSERT(rpcsrv != nullptr); - LOG_ASSERT(_myrpcsrv_map.find(rpcsrv->getName()) == _myrpcsrv_map.end()); - - // must not be reserved for something else already - // this should have been checked already, so assert - LOG_ASSERT(! conflictingReservation(rpcsrv->getName(), rpcsrv->getSpec())); - auto old = std::move(_reservations[rpcsrv->getName()]); - LOG_ASSERT(!old - || old->getSpec() == rpcsrv->getSpec() - || ! old->stillReserved()); - _reservations[rpcsrv->getName()] = std::move(rpcsrv); -} - - -/** check if there is a (different) registration for this name in progress */ -bool -RpcServerMap::conflictingReservation(const std::string &name, const std::string &spec) -{ - const ReservedName *resv = _reservations[name].get(); - return (resv != nullptr && - resv->stillReserved() && - resv->getSpec() != spec); -} - -const ReservedName * -RpcServerMap::getReservation(const std::string &name) const { - auto found = _reservations.find(name); - return (found == _reservations.end()) ? nullptr : found->second.get(); -} - -RpcServerMap::RpcServerMap() = default; - -RpcServerMap::~RpcServerMap() = default; - -void -RpcServerMap::removeReservation(const std::string & name) -{ - _reservations.erase(name); -} - -} // namespace slobrok diff --git a/slobrok/src/vespa/slobrok/server/rpc_server_map.h b/slobrok/src/vespa/slobrok/server/rpc_server_map.h deleted file mode 100644 index 3d2999069ea..00000000000 --- a/slobrok/src/vespa/slobrok/server/rpc_server_map.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "named_service.h" -#include "service_map_history.h" -#include "proxy_map_source.h" - -#include <memory> -#include <string> -#include <unordered_map> - -namespace slobrok { - -class NamedService; -class ManagedRpcServer; -class ReservedName; - -/** - * @class RpcServerMap - * @brief Contains the actual collections of NamedService (and subclasses) - * objects known by this location broker. - * - * Works as a collection of NamedService objects, but actually contains - * three seperate hashmaps. - **/ - -class RpcServerMap -{ -private: - using ManagedRpcServerMap = std::unordered_map<std::string, std::unique_ptr<ManagedRpcServer>>; - using ReservedNameMap = std::unordered_map<std::string, std::unique_ptr<ReservedName>>; - ManagedRpcServerMap _myrpcsrv_map; - ReservedNameMap _reservations; - ProxyMapSource _proxy; - - void add(NamedService *rpcsrv); - -public: - typedef std::vector<const NamedService *> RpcSrvlist; - - MapSource &proxy() { return _proxy; } - - ManagedRpcServer *lookupManaged(const std::string & name) const; - - const NamedService * lookup(const std::string & name) const; - RpcSrvlist allManaged() const; - - void addNew(std::unique_ptr<ManagedRpcServer> rpcsrv); - std::unique_ptr<NamedService> remove(const std::string & name); - - void addReservation(std::unique_ptr<ReservedName>rpcsrv); - bool conflictingReservation(const std::string & name, const std::string &spec); - - const ReservedName *getReservation(const std::string & name) const; - void removeReservation(const std::string & name); - - RpcServerMap(const RpcServerMap &) = delete; - RpcServerMap &operator=(const RpcServerMap &) = delete; - RpcServerMap(); - ~RpcServerMap(); -}; - -//----------------------------------------------------------------------------- - -} // namespace slobrok - diff --git a/slobrok/src/vespa/slobrok/server/rpchooks.cpp b/slobrok/src/vespa/slobrok/server/rpchooks.cpp index 540060210ed..6ce24be9201 100644 --- a/slobrok/src/vespa/slobrok/server/rpchooks.cpp +++ b/slobrok/src/vespa/slobrok/server/rpchooks.cpp @@ -4,8 +4,6 @@ #include "ok_state.h" #include "named_service.h" #include "request_completion_handler.h" -#include "rpc_server_map.h" -#include "rpc_server_manager.h" #include "remote_slobrok.h" #include "sbenv.h" #include "rpcmirror.h" @@ -38,12 +36,31 @@ public: ~MetricsReport() override { Kill(); } }; +bool match(const char *name, const char *pattern) { + LOG_ASSERT(name != nullptr); + LOG_ASSERT(pattern != nullptr); + while (*pattern != '\0') { + if (*name == *pattern) { + ++name; + ++pattern; + } else if (*pattern == '*') { + ++pattern; + while (*name != '/' && *name != '\0') { + ++name; + } + } else { + return false; + } + } + return (*name == *pattern); +} + } // namespace <unnamed> //----------------------------------------------------------------------------- -RPCHooks::RPCHooks(SBEnv &env, RpcServerMap& rpcsrvmap, RpcServerManager& rpcsrvman) - : _env(env), _rpcsrvmap(rpcsrvmap), _rpcsrvmanager(rpcsrvman), +RPCHooks::RPCHooks(SBEnv &env) + : _env(env), _globalHistory(env.globalHistory()), _localHistory(env.localHistory()), _cnts(Metrics::zero()), @@ -65,25 +82,6 @@ void RPCHooks::reportMetrics() { EV_COUNT("other_reqs", _cnts.otherReqs); } -bool RPCHooks::match(const char *name, const char *pattern) { - LOG_ASSERT(name != nullptr); - LOG_ASSERT(pattern != nullptr); - while (*pattern != '\0') { - if (*name == *pattern) { - ++name; - ++pattern; - } else if (*pattern == '*') { - ++pattern; - while (*name != '/' && *name != '\0') { - ++name; - } - } else { - return false; - } - } - return (*name == *pattern); -} - void RPCHooks::initRPC(FRT_Supervisor *supervisor) { _m_reporter = std::make_unique<MetricsReport>(supervisor, *this); @@ -229,11 +227,6 @@ void RPCHooks::initRPC(FRT_Supervisor *supervisor) { //------------------------------------------------------------------------- } - -bool RPCHooks::useNewLogic() const { - return _env.useNewLogic(); -} - void RPCHooks::rpc_listNamesServed(FRT_RPCRequest *req) { FRT_Values &dst = *req->GetReturn(); FRT_StringValue *names = dst.AddStringArray(1); @@ -257,9 +250,6 @@ void RPCHooks::rpc_registerRpcServer(FRT_RPCRequest *req) { } req->Detach(); _env.localMonitorMap().addLocal(mapping, std::make_unique<RequestCompletionHandler>(req)); - // TODO: remove this - auto script = ScriptCommand::makeRegRpcSrvCmd(_env, dName, dSpec, nullptr); - script.doRequest(); return; } diff --git a/slobrok/src/vespa/slobrok/server/rpchooks.h b/slobrok/src/vespa/slobrok/server/rpchooks.h index b68eb9007a8..bd051df64f1 100644 --- a/slobrok/src/vespa/slobrok/server/rpchooks.h +++ b/slobrok/src/vespa/slobrok/server/rpchooks.h @@ -40,8 +40,6 @@ public: private: SBEnv &_env; - RpcServerMap &_rpcsrvmap; - RpcServerManager &_rpcsrvmanager; ServiceMapHistory &_globalHistory; ServiceMapHistory &_localHistory; @@ -49,44 +47,33 @@ private: std::unique_ptr<FNET_Task> _m_reporter; public: - RPCHooks(SBEnv &env, RpcServerMap& rpcsrvmap, RpcServerManager& rpcsrvman); + RPCHooks(SBEnv &env); ~RPCHooks() override; - static bool match(const char *name, const char *pattern); - void initRPC(FRT_Supervisor *supervisor); void reportMetrics(); const Metrics& getMetrics() const { return _cnts; } void countFailedHeartbeat() { _cnts.heartBeatFails++; } private: - bool useNewLogic() const; - - void rpc_lookupRpcServer(FRT_RPCRequest *req); - - void new_registerRpcServer(FRT_RPCRequest *req); - void new_unregisterRpcServer(FRT_RPCRequest *req); - void new_wantAdd(FRT_RPCRequest *req); - void new_doRemove(FRT_RPCRequest *req); - void new_doAdd(FRT_RPCRequest *req); - void rpc_registerRpcServer(FRT_RPCRequest *req); void rpc_unregisterRpcServer(FRT_RPCRequest *req); - void rpc_addPeer(FRT_RPCRequest *req); void rpc_removePeer(FRT_RPCRequest *req); + void rpc_incrementalFetch(FRT_RPCRequest *req); + void rpc_doRemove(FRT_RPCRequest *req); + void rpc_fetchLocalView(FRT_RPCRequest *req); + void rpc_listNamesServed(FRT_RPCRequest *req); + + /** for unit tests and debugging, consider removing some of these: */ + void rpc_lookupRpcServer(FRT_RPCRequest *req); void rpc_listManagedRpcServers(FRT_RPCRequest *req); void rpc_lookupManaged(FRT_RPCRequest *req); void rpc_listAllRpcServers(FRT_RPCRequest *req); - void rpc_incrementalFetch(FRT_RPCRequest *req); void rpc_wantAdd(FRT_RPCRequest *req); void rpc_doAdd(FRT_RPCRequest *req); - void rpc_doRemove(FRT_RPCRequest *req); - void rpc_fetchLocalView(FRT_RPCRequest *req); - - void rpc_listNamesServed(FRT_RPCRequest *req); - void rpc_getRpcServerHistory(FRT_RPCRequest *req); + /** consider removing: */ void rpc_stop(FRT_RPCRequest *req); void rpc_version(FRT_RPCRequest *req); }; diff --git a/slobrok/src/vespa/slobrok/server/sbenv.cpp b/slobrok/src/vespa/slobrok/server/sbenv.cpp index 42debe1556c..1d279994cb0 100644 --- a/slobrok/src/vespa/slobrok/server/sbenv.cpp +++ b/slobrok/src/vespa/slobrok/server/sbenv.cpp @@ -97,19 +97,16 @@ ConfigTask::PerformTask() } // namespace slobrok::<unnamed> -SBEnv::SBEnv(const ConfigShim &shim) : SBEnv(shim, true) {} - -SBEnv::SBEnv(const ConfigShim &shim, bool) +SBEnv::SBEnv(const ConfigShim &shim) : _transport(std::make_unique<FNET_Transport>(TransportConfig().drop_empty_buffers(true))), _supervisor(std::make_unique<FRT_Supervisor>(_transport.get())), _configShim(shim), _configurator(shim.factory().create(*this)), _shuttingDown(false), - _useNewLogic(true), _partnerList(), _me(createSpec(_configShim.portNumber())), - _rpcHooks(*this, _rpcsrvmap, _rpcsrvmanager), - _remotechecktask(std::make_unique<RemoteCheck>(getSupervisor()->GetScheduler(), _rpcsrvmap, _rpcsrvmanager, _exchanger)), + _rpcHooks(*this), + _remotechecktask(std::make_unique<RemoteCheck>(getSupervisor()->GetScheduler(), _exchanger)), _health(), _metrics(_rpcHooks, *_transport), _components(), @@ -117,9 +114,7 @@ SBEnv::SBEnv(const ConfigShim &shim, bool) [this] (MappingMonitorOwner &owner) { return std::make_unique<RpcMappingMonitor>(*_supervisor, owner); }), - _rpcsrvmanager(*this), - _exchanger(*this, _rpcsrvmap), - _rpcsrvmap() + _exchanger(*this) { srandom(time(nullptr) ^ getpid()); // note: feedback loop between these two: @@ -201,7 +196,6 @@ SBEnv::MainLoop() return 0; } - void SBEnv::setup(const std::vector<std::string> &cfg) { @@ -272,7 +266,7 @@ SBEnv::removePeer(const std::string &name, const std::string &spec) if (partner == nullptr) { return OkState(0, "remote slobrok not a partner"); } - _exchanger.removePartner(name); + _exchanger.removePartner(spec); return OkState(0, "done"); } diff --git a/slobrok/src/vespa/slobrok/server/sbenv.h b/slobrok/src/vespa/slobrok/server/sbenv.h index c6fd8905131..09986e037a9 100644 --- a/slobrok/src/vespa/slobrok/server/sbenv.h +++ b/slobrok/src/vespa/slobrok/server/sbenv.h @@ -3,8 +3,6 @@ #include "named_service.h" #include "rpc_mapping_monitor.h" -#include "rpc_server_map.h" -#include "rpc_server_manager.h" #include "remote_slobrok.h" #include "exchange_manager.h" #include "configshim.h" @@ -44,7 +42,6 @@ private: ConfigShim _configShim; Configurator::UP _configurator; bool _shuttingDown; - const bool _useNewLogic; SBEnv(const SBEnv &); // Not used SBEnv &operator=(const SBEnv &); // Not used @@ -62,9 +59,7 @@ private: UnionServiceMap _consensusMap; ServiceMapHistory _globalVisibleHistory; - RpcServerManager _rpcsrvmanager; ExchangeManager _exchanger; - RpcServerMap _rpcsrvmap; std::unique_ptr<MapSubscription> _localMonitorSubscription; std::unique_ptr<MapSubscription> _consensusSubscription; @@ -72,7 +67,6 @@ private: public: explicit SBEnv(const ConfigShim &shim); - SBEnv(const ConfigShim &shim, bool useNewConsensusLogic); ~SBEnv(); FNET_Transport *getTransport() { return _transport.get(); } @@ -83,9 +77,7 @@ public: void suspend(); void resume(); - RpcServerManager& rpcServerManager() { return _rpcsrvmanager; } ExchangeManager& exchangeManager() { return _exchanger; } - RpcServerMap& rpcServerMap() { return _rpcsrvmap; } ServiceMapHistory& globalHistory() { return _globalVisibleHistory; @@ -107,12 +99,11 @@ public: bool isSuspended() const { return false; } bool isShuttingDown() const { return _shuttingDown; } - bool useNewLogic() const { return _useNewLogic; } int MainLoop(); - OkState addPeer(const std::string& name, const std::string &spec); - OkState removePeer(const std::string& name, const std::string &spec); + OkState addPeer(const std::string& name, const std::string& spec); + OkState removePeer(const std::string& name, const std::string& spec); void countFailedHeartbeat() { _rpcHooks.countFailedHeartbeat(); } }; diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp index 9ec4d31eb32..3cfb8d70b7d 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp @@ -50,7 +50,7 @@ DistributorBucketSpace::enumerate_available_nodes() _distribution_bits = _clusterState->getDistributionBitCount(); auto node_count = _clusterState->getNodeCount(lib::NodeType::STORAGE); if (_pending_cluster_state) { - _distribution_bits = std::min(_distribution_bits, _pending_cluster_state->getDistributionBitCount()); + _distribution_bits = std::max(_distribution_bits, _pending_cluster_state->getDistributionBitCount()); node_count = std::min(node_count, _pending_cluster_state->getNodeCount(lib::NodeType::STORAGE)); } std::vector<bool> nodes(node_count); diff --git a/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java b/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java index c536fc0f4cd..8704b11fdfb 100644 --- a/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java +++ b/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java @@ -3,12 +3,13 @@ package com.yahoo.vdslib.state; import com.yahoo.text.StringUtilities; import java.text.ParseException; -import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.StringTokenizer; -import java.util.TreeMap; -import java.util.TreeSet; /** * Be careful about changing this class, as it mirrors the ClusterState in C++. @@ -18,25 +19,183 @@ public class ClusterState implements Cloneable { private static final NodeState DEFAULT_STORAGE_UP_NODE_STATE = new NodeState(NodeType.STORAGE, State.UP); private static final NodeState DEFAULT_DISTRIBUTOR_UP_NODE_STATE = new NodeState(NodeType.DISTRIBUTOR, State.UP); + private static final NodeState DEFAULT_STORAGE_DOWN_NODE_STATE = new NodeState(NodeType.STORAGE, State.DOWN); + private static final NodeState DEFAULT_DISTRIBUTOR_DOWN_NODE_STATE = new NodeState(NodeType.DISTRIBUTOR, State.DOWN); - private int version = 0; - private State state = State.DOWN; - // nodeStates maps each of the non-up nodes that have an index <= the node count for its type. - private Map<Node, NodeState> nodeStates = new TreeMap<>(); + /** + * Maintains a bitset where all non-down nodes have a bit set. All nodes that differs from defaultUp + * and defaultDown are store explicit in a hash map. + */ + private static class Nodes { + private int logicalNodeCount; + private final NodeType type; + private final BitSet upNodes; + private final Map<Integer, NodeState> nodeStates = new HashMap<>(); + Nodes(NodeType type) { + this.type = type; + upNodes = new BitSet(); + } + Nodes(Nodes b) { + logicalNodeCount = b.logicalNodeCount; + type = b.type; + upNodes = (BitSet) b.upNodes.clone(); + b.nodeStates.forEach((key, value) -> nodeStates.put(key, value.clone())); + } + + void updateMaxIndex(int index) { + if (index > logicalNodeCount) { + upNodes.set(logicalNodeCount, index); + logicalNodeCount = index; + } + } + + int getLogicalNodeCount() { return logicalNodeCount; } + + NodeState getNodeState(int index) { + NodeState ns = nodeStates.get(index); + if (ns != null) return ns; + return (index >= getLogicalNodeCount() || ! upNodes.get(index)) + ? new NodeState(type, State.DOWN) + : new NodeState(type, State.UP); + } + + private void validateInput(Node node, NodeState ns) { + ns.verifyValidInSystemState(node.getType()); + if (node.getType() != type) { + throw new IllegalArgumentException("NodeType '" + node.getType() + "' differs from '" + type + "'"); + } + } + + void setNodeState(Node node, NodeState ns) { + validateInput(node, ns); + int index = node.getIndex(); + if (index >= logicalNodeCount) { + logicalNodeCount = index + 1; + } + setNodeStateInternal(index, ns); + } + + void addNodeState(Node node, NodeState ns) { + validateInput(node, ns); + int index = node.getIndex(); + updateMaxIndex(index + 1); + setNodeStateInternal(index, ns); + } + + private static boolean equalsWithDescription(NodeState a, NodeState b) { + // This is due to NodeState.equals considers semantic equality, and description is not part of that. + return a.equals(b) && ((a.getState() != State.DOWN) || a.getDescription().equals(b.getDescription())); + } + + private void setNodeStateInternal(int index, NodeState ns) { + nodeStates.remove(index); + if (ns.getState() == State.DOWN) { + upNodes.clear(index); + if ( ! equalsWithDescription(defaultDown(), ns)) { + nodeStates.put(index, ns); + } + } else { + upNodes.set(index); + if ( ! equalsWithDescription(defaultUp(), ns)) { + nodeStates.put(index, ns); + } + } + } + + boolean similarToImpl(Nodes other, final NodeStateCmp nodeStateCmp) { + // TODO verify behavior of C++ impl against this + if (logicalNodeCount != other.logicalNodeCount) return false; + if (type != other.type) return false; + if ( ! upNodes.equals(other.upNodes)) return false; + for (Integer node : unionNodeSetWith(other.nodeStates.keySet())) { + final NodeState lhs = nodeStates.get(node); + final NodeState rhs = other.nodeStates.get(node); + if (!nodeStateCmp.similar(type, lhs, rhs)) { + return false; + } + } + return true; + } + + private Set<Integer> unionNodeSetWith(final Set<Integer> otherNodes) { + final Set<Integer> unionNodeSet = new HashSet<>(nodeStates.keySet()); + unionNodeSet.addAll(otherNodes); + return unionNodeSet; + } + + @Override + public String toString() { return toString(false); } + + String toString(boolean verbose) { + StringBuilder sb = new StringBuilder(); + + int nodeCount = verbose ? getLogicalNodeCount() : upNodes.length(); + if ( nodeCount > 0 ) { + sb.append(type == NodeType.DISTRIBUTOR ? " distributor:" : " storage:").append(nodeCount); + for (int i = 0; i < nodeCount; i++) { + String nodeState = getNodeState(i).serialize(i, verbose); + if (!nodeState.isEmpty()) { + sb.append(' ').append(nodeState); + } + } + } + return sb.toString(); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof Nodes)) return false; + Nodes b = (Nodes) obj; + if (logicalNodeCount != b.logicalNodeCount) return false; + if (type != b.type) return false; + if (!upNodes.equals(b.upNodes)) return false; + if (!nodeStates.equals(b.nodeStates)) return false; + return true; + } - // TODO: Change to one count for distributor and one for storage, rather than an array - // TODO: RenameFunction, this is not the highest node count but the highest index - private ArrayList<Integer> nodeCount = new ArrayList<>(2); + @Override + public int hashCode() { + return Objects.hash(logicalNodeCount, type, nodeStates, upNodes); + } + private NodeState defaultDown() { + return type == NodeType.STORAGE + ? DEFAULT_STORAGE_DOWN_NODE_STATE + : DEFAULT_DISTRIBUTOR_DOWN_NODE_STATE; + } + private NodeState defaultUp() { + return defaultUpNodeState(type); + } + } + private int version = 0; + private State state = State.DOWN; private String description = ""; private int distributionBits = 16; + private final Nodes distributorNodes; + private final Nodes storageNodes; + public ClusterState(String serialized) throws ParseException { - nodeCount.add(0); - nodeCount.add(0); + distributorNodes = new Nodes(NodeType.DISTRIBUTOR); + storageNodes = new Nodes(NodeType.STORAGE); deserialize(serialized); } + public ClusterState(ClusterState b) { + version = b.version; + state = b.state; + description = b.description; + distributionBits = b.distributionBits; + distributorNodes = new Nodes(b.distributorNodes); + storageNodes = new Nodes(b.storageNodes); + } + + private Nodes getNodes(NodeType type) { + return (type == NodeType.STORAGE) + ? storageNodes + : (type == NodeType.DISTRIBUTOR) ? distributorNodes : null; + } + /** * Parse a given cluster state string into a returned ClusterState instance, wrapping any * parse exceptions in a RuntimeException. @@ -54,20 +213,7 @@ public class ClusterState implements Cloneable { } public ClusterState clone() { - try{ - ClusterState state = (ClusterState) super.clone(); - state.nodeStates = new TreeMap<>(); - for (Map.Entry<Node, NodeState> entry : nodeStates.entrySet()) { - state.nodeStates.put(entry.getKey(), entry.getValue().clone()); - } - state.nodeCount = new ArrayList<>(2); - state.nodeCount.add(nodeCount.get(0)); - state.nodeCount.add(nodeCount.get(1)); - return state; - } catch (CloneNotSupportedException e) { - assert(false); // Should never happen - return null; - } + return new ClusterState(this); } @Override @@ -77,8 +223,8 @@ public class ClusterState implements Cloneable { if (version != other.version || !state.equals(other.state) || distributionBits != other.distributionBits - || !nodeCount.equals(other.nodeCount) - || !nodeStates.equals(other.nodeStates)) + || !distributorNodes.equals(other.distributorNodes) + || !storageNodes.equals(other.storageNodes)) { return false; } @@ -87,7 +233,7 @@ public class ClusterState implements Cloneable { @Override public int hashCode() { - return java.util.Objects.hash(version, state, distributionBits, nodeCount, nodeStates); + return java.util.Objects.hash(version, state, distributionBits, distributorNodes, storageNodes); } @FunctionalInterface @@ -106,7 +252,7 @@ public class ClusterState implements Cloneable { return similarToImpl(other, this::normalizedNodeStateSimilarToIgnoringInitProgress); } - private boolean similarToImpl(final ClusterState other, final NodeStateCmp nodeStateCmp) { + private boolean similarToImpl(ClusterState other, final NodeStateCmp nodeStateCmp) { if (other == this) { return true; // We're definitely similar to ourselves. } @@ -119,23 +265,11 @@ public class ClusterState implements Cloneable { if (!metaInformationSimilarTo(other)) { return false; } - // TODO verify behavior of C++ impl against this - for (Node node : unionNodeSetWith(other.nodeStates.keySet())) { - final NodeState lhs = nodeStates.get(node); - final NodeState rhs = other.nodeStates.get(node); - if (!nodeStateCmp.similar(node.getType(), lhs, rhs)) { - return false; - } - } + if ( !distributorNodes.similarToImpl(other.distributorNodes, nodeStateCmp)) return false; + if ( !storageNodes.similarToImpl(other.storageNodes, nodeStateCmp)) return false; return true; } - private Set<Node> unionNodeSetWith(final Set<Node> otherNodes) { - final Set<Node> unionNodeSet = new TreeSet<>(nodeStates.keySet()); - unionNodeSet.addAll(otherNodes); - return unionNodeSet; - } - private boolean metaInformationSimilarTo(final ClusterState other) { if (version != other.version || !state.equals(other.state)) { return false; @@ -143,7 +277,7 @@ public class ClusterState implements Cloneable { if (distributionBits != other.distributionBits) { return false; } - return nodeCount.equals(other.nodeCount); + return true; } private boolean normalizedNodeStateSimilarTo(final NodeType nodeType, final NodeState lhs, final NodeState rhs) { @@ -178,12 +312,7 @@ public class ClusterState implements Cloneable { void addNodeState() throws ParseException { if (!empty) { NodeState ns = NodeState.deserialize(node.getType(), sb.toString()); - if (!ns.equals(defaultUpNodeState(node.getType()))) { - nodeStates.put(node, ns); - } - if (nodeCount.get(node.getType().ordinal()) <= node.getIndex()) { - nodeCount.set(node.getType().ordinal(), node.getIndex() + 1); - } + getNodes(node.getType()).addNodeState(node, ns); } empty = true; sb = new StringBuilder(); @@ -257,9 +386,7 @@ public class ClusterState implements Cloneable { } catch (Exception e) { throw new ParseException("Illegal node count '" + value + "' in state: " + serialized, 0); } - if (nodeCount > this.nodeCount.get(nodeType.ordinal())) { - this.nodeCount.set(nodeType.ordinal(), nodeCount); - } + getNodes(nodeType).updateMaxIndex(nodeCount); continue; } int dot2 = key.indexOf('.', dot + 1); @@ -269,8 +396,8 @@ public class ClusterState implements Cloneable { } else { node = new Node(nodeType, Integer.valueOf(key.substring(dot + 1, dot2))); } - if (node.getIndex() >= this.nodeCount.get(nodeType.ordinal())) { - throw new ParseException("Cannot index " + nodeType + " node " + node.getIndex() + " of " + this.nodeCount.get(nodeType.ordinal()) + " in state: " + serialized, 0); + if (node.getIndex() >= getNodeCount(nodeType)) { + throw new ParseException("Cannot index " + nodeType + " node " + node.getIndex() + " of " + getNodeCount(nodeType) + " in state: " + serialized, 0); } if (!nodeData.node.equals(node)) { nodeData.addNodeState(); @@ -289,7 +416,6 @@ public class ClusterState implements Cloneable { // Ignore unknown nodeStates } nodeData.addNodeState(); - removeLastNodesDownWithoutReason(); } public String getTextualDifference(ClusterState other) { @@ -363,7 +489,7 @@ public class ClusterState implements Cloneable { * E.g. if node X is down and without description, but nodex X-1 is up, then Y is 1. * The node count for distributors is then X + 1 - Y. */ - public int getNodeCount(NodeType type) { return nodeCount.get(type.ordinal()); } + public int getNodeCount(NodeType type) { return getNodes(type).getLogicalNodeCount(); } /** * Returns the state of a node. @@ -371,9 +497,7 @@ public class ClusterState implements Cloneable { * and DOWN otherwise. */ public NodeState getNodeState(Node node) { - if (node.getIndex() >= nodeCount.get(node.getType().ordinal())) - return new NodeState(node.getType(), State.DOWN); - return nodeStates.getOrDefault(node, new NodeState(node.getType(), State.UP)); + return getNodes(node.getType()).getNodeState(node.getIndex()); } /** @@ -383,35 +507,7 @@ public class ClusterState implements Cloneable { */ public void setNodeState(Node node, NodeState newState) { newState.verifyValidInSystemState(node.getType()); - if (node.getIndex() >= nodeCount.get(node.getType().ordinal())) { - for (int i= nodeCount.get(node.getType().ordinal()); i<node.getIndex(); ++i) { - nodeStates.put(new Node(node.getType(), i), new NodeState(node.getType(), State.DOWN)); - } - nodeCount.set(node.getType().ordinal(), node.getIndex() + 1); - } - if (newState.equals(new NodeState(node.getType(), State.UP))) { - nodeStates.remove(node); - } else { - nodeStates.put(node, newState); - } - if (newState.getState().equals(State.DOWN)) { - // We might be setting the last node down, so we can remove some states - removeLastNodesDownWithoutReason(); - } - } - - private void removeLastNodesDownWithoutReason() { - for (NodeType nodeType : NodeType.values()) { - for (int index = nodeCount.get(nodeType.ordinal()) - 1; index >= 0; --index) { - Node node = new Node(nodeType, index); - NodeState nodeState = nodeStates.get(node); - if (nodeState == null) break; // Node not existing is up - if ( ! nodeState.getState().equals(State.DOWN)) break; // Node not down can not be removed - if (nodeState.hasDescription()) break; // Node have reason to be down. Don't remove node as we will forget reason - nodeStates.remove(node); - nodeCount.set(nodeType.ordinal(), node.getIndex()); - } - } + getNodes(node.getType()).setNodeState(node, newState); } public String getDescription() { return description; } @@ -440,35 +536,9 @@ public class ClusterState implements Cloneable { sb.append(" bits:").append(distributionBits); } - int distributorNodeCount = getNodeCount(NodeType.DISTRIBUTOR); - int storageNodeCount = getNodeCount(NodeType.STORAGE); - // If not printing verbose, we're not printing descriptions, so we can remove tailing nodes that are down that has descriptions too - if (!verbose) { - while (distributorNodeCount > 0 && getNodeState(new Node(NodeType.DISTRIBUTOR, distributorNodeCount - 1)).getState().equals(State.DOWN)) --distributorNodeCount; - while (storageNodeCount > 0 && getNodeState(new Node(NodeType.STORAGE, storageNodeCount - 1)).getState().equals(State.DOWN)) --storageNodeCount; - } - if (distributorNodeCount > 0){ - sb.append(" distributor:").append(distributorNodeCount); - for (Map.Entry<Node, NodeState> entry : nodeStates.entrySet()) { - if (entry.getKey().getType().equals(NodeType.DISTRIBUTOR) && entry.getKey().getIndex() < distributorNodeCount) { - String nodeState = entry.getValue().serialize(entry.getKey().getIndex(), verbose); - if (!nodeState.isEmpty()) { - sb.append(' ').append(nodeState); - } - } - } - } - if (storageNodeCount > 0){ - sb.append(" storage:").append(storageNodeCount); - for (Map.Entry<Node, NodeState> entry : nodeStates.entrySet()) { - if (entry.getKey().getType().equals(NodeType.STORAGE) && entry.getKey().getIndex() < storageNodeCount) { - String nodeState = entry.getValue().serialize(entry.getKey().getIndex(), verbose); - if (!nodeState.isEmpty()) { - sb.append(' ').append(nodeState); - } - } - } - } + sb.append(distributorNodes.toString(verbose)); + sb.append(storageNodes.toString(verbose)); + if (sb.length() > 0) { // Remove first space if not empty sb.deleteCharAt(0); } diff --git a/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java b/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java index 0edf30bc1ff..80cf87b7597 100644 --- a/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java +++ b/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java @@ -51,6 +51,7 @@ public class NodeState implements Cloneable { public boolean equals(Object o) { if (!(o instanceof NodeState)) { return false; } NodeState ns = (NodeState) o; + // Note that 'description' is not considered as it carries semantics. if (state != ns.state || Math.abs(capacity - ns.capacity) > 0.0000000001 || Math.abs(initProgress - ns.initProgress) > 0.0000000001 diff --git a/vdslib/src/test/java/com/yahoo/vdslib/state/ClusterStateTestCase.java b/vdslib/src/test/java/com/yahoo/vdslib/state/ClusterStateTestCase.java index dbc8888cfda..956ea216a44 100644 --- a/vdslib/src/test/java/com/yahoo/vdslib/state/ClusterStateTestCase.java +++ b/vdslib/src/test/java/com/yahoo/vdslib/state/ClusterStateTestCase.java @@ -4,6 +4,7 @@ package com.yahoo.vdslib.state; import org.junit.Test; import java.text.ParseException; +import java.util.BitSet; import java.util.function.BiFunction; import static org.junit.Assert.assertEquals; @@ -311,6 +312,17 @@ public class ClusterStateTestCase{ } @Test + public void testBitSet() { + BitSet b = new BitSet(); + assertEquals(0, b.length()); + b.set(7); + b.set(107); + assertEquals(108, b.length()); + b.clear(107); + assertEquals(8, b.length()); + } + + @Test public void testVersionAndClusterStates() throws ParseException { ClusterState state = new ClusterState("version:4 cluster:i distributor:2 .1.s:i storage:2 .0.s:i .0.i:0.345"); assertEquals(4, state.getVersion()); diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index e5d5b8ba5b6..e68a37b15b6 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -1203,6 +1203,7 @@ "public com.yahoo.tensor.Tensor notEqual(com.yahoo.tensor.Tensor)", "public com.yahoo.tensor.Tensor approxEqual(com.yahoo.tensor.Tensor)", "public com.yahoo.tensor.Tensor bit(com.yahoo.tensor.Tensor)", + "public com.yahoo.tensor.Tensor hamming(com.yahoo.tensor.Tensor)", "public com.yahoo.tensor.Tensor avg()", "public com.yahoo.tensor.Tensor avg(java.lang.String)", "public com.yahoo.tensor.Tensor avg(java.util.List)", @@ -2189,6 +2190,22 @@ ], "fields": [] }, + "com.yahoo.tensor.functions.ScalarFunctions$Hamming": { + "superClass": "java.lang.Object", + "interfaces": [ + "java.util.function.DoubleBinaryOperator" + ], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>()", + "public static double hamming(double, double)", + "public double applyAsDouble(double, double)", + "public java.lang.String toString()" + ], + "fields": [] + }, "com.yahoo.tensor.functions.ScalarFunctions$LeakyRelu": { "superClass": "java.lang.Object", "interfaces": [ @@ -2557,6 +2574,7 @@ "public static java.util.function.DoubleBinaryOperator pow()", "public static java.util.function.DoubleBinaryOperator squareddifference()", "public static java.util.function.DoubleBinaryOperator subtract()", + "public static java.util.function.DoubleBinaryOperator hamming()", "public static java.util.function.DoubleUnaryOperator abs()", "public static java.util.function.DoubleUnaryOperator acos()", "public static java.util.function.DoubleUnaryOperator asin()", diff --git a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java index ab475e25387..3d4536d9249 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java @@ -36,6 +36,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import static com.yahoo.text.Ascii7BitMatcher.charsAndNumbers; +import static com.yahoo.tensor.functions.ScalarFunctions.Hamming; /** * A multidimensional array which can be used in computations. @@ -241,6 +242,7 @@ public interface Tensor { default Tensor notEqual(Tensor argument) { return join(argument, (a, b) -> ( a != b ? 1.0 : 0.0)); } default Tensor approxEqual(Tensor argument) { return join(argument, (a, b) -> ( approxEquals(a,b) ? 1.0 : 0.0)); } default Tensor bit(Tensor argument) { return join(argument, (a,b) -> ((int)b < 8 && (int)b >= 0 && ((int)a & (1 << (int)b)) != 0) ? 1.0 : 0.0); } + default Tensor hamming(Tensor argument) { return join(argument, (a,b) -> Hamming.hamming(a,b)); } default Tensor avg() { return avg(Collections.emptyList()); } default Tensor avg(String dimension) { return avg(Collections.singletonList(dimension)); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/ScalarFunctions.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/ScalarFunctions.java index 3ee9e67cdd6..d6fcd17b8fb 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/ScalarFunctions.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/ScalarFunctions.java @@ -33,6 +33,7 @@ public class ScalarFunctions { public static DoubleBinaryOperator pow() { return new Pow(); } public static DoubleBinaryOperator squareddifference() { return new SquaredDifference(); } public static DoubleBinaryOperator subtract() { return new Subtract(); } + public static DoubleBinaryOperator hamming() { return new Hamming(); } public static DoubleUnaryOperator abs() { return new Abs(); } public static DoubleUnaryOperator acos() { return new Acos(); } @@ -152,6 +153,26 @@ public class ScalarFunctions { public String toString() { return "f(a,b)(a - b)"; } } + + public static class Hamming implements DoubleBinaryOperator { + public static double hamming(double left, double right) { + double distance = 0; + byte a = (byte) left; + byte b = (byte) right; + for (int i = 0; i < 8; i++) { + byte bit = (byte) (1 << i); + if ((a & bit) != (b & bit)) { + distance += 1; + } + } + return distance; + } + @Override + public double applyAsDouble(double left, double right) { return hamming(left, right); } + @Override + public String toString() { return "f(a,b)(hamming(a,b))"; } + } + // Unary operators ------------------------------------------------------------------------------ |