diff options
33 files changed, 517 insertions, 232 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index e238303b58b..60b14e86f50 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -383,7 +383,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd verifyInControllerThread(); ClusterState baselineState = stateBundle.getBaselineClusterState(); newStates.add(stateBundle); - metricUpdater.updateClusterStateMetrics(cluster, baselineState); + metricUpdater.updateClusterStateMetrics(cluster, baselineState, + ResourceUsageStats.calculateFrom(cluster.getNodeInfo(), options.clusterFeedBlockLimit, stateBundle.getFeedBlock())); lastMetricUpdateCycleCount = cycleCount; systemStateBroadcaster.handleNewClusterStates(stateBundle); // Iff master, always store new version in ZooKeeper _before_ publishing to any @@ -399,7 +400,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if (cycleCount > 300 + lastMetricUpdateCycleCount) { ClusterStateBundle stateBundle = stateVersionTracker.getVersionedClusterStateBundle(); ClusterState baselineState = stateBundle.getBaselineClusterState(); - metricUpdater.updateClusterStateMetrics(cluster, baselineState); + metricUpdater.updateClusterStateMetrics(cluster, baselineState, + ResourceUsageStats.calculateFrom(cluster.getNodeInfo(), options.clusterFeedBlockLimit, stateBundle.getFeedBlock())); lastMetricUpdateCycleCount = cycleCount; return true; } else { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java index 650f7756bf9..40b10fe8145 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java @@ -32,7 +32,7 @@ public class MetricUpdater { + nodeCounts.getOrDefault(State.MAINTENANCE, 0); } - public void updateClusterStateMetrics(ContentCluster cluster, ClusterState state) { + public void updateClusterStateMetrics(ContentCluster cluster, ClusterState state, ResourceUsageStats resourceUsage) { Map<String, String> dimensions = new HashMap<>(); dimensions.put("cluster", cluster.getName()); for (NodeType type : NodeType.getTypes()) { @@ -59,6 +59,10 @@ public class MetricUpdater { dimensions.remove("node-type"); MetricReporter.Context context = createContext(dimensions); metricReporter.add("cluster-state-change", 1, context); + + metricReporter.set("resource_usage.max_disk_utilization", resourceUsage.getMaxDiskUtilization(), context); + metricReporter.set("resource_usage.max_memory_utilization", resourceUsage.getMaxMemoryUtilization(), context); + metricReporter.set("resource_usage.nodes_above_limit", resourceUsage.getNodesAboveLimit(), context); } public void updateMasterElectionMetrics(Map<Integer, Integer> data) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java index c730350310c..8d0a873a801 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java @@ -53,6 +53,13 @@ public class NodeResourceExhaustion { return String.format("%s (<= %.3g)", makeDescriptionPrefix(), limit); } + public String toShorthandDescription() { + return String.format("%s%s %.3g > %.3g", + resourceType, + (resourceUsage.getName() != null ? ":" + resourceUsage.getName() : ""), + resourceUsage.getUsage(), limit); + } + private String makeDescriptionPrefix() { return String.format("%s%s on node %d [%s]", resourceType, diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceUsageStats.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceUsageStats.java new file mode 100644 index 00000000000..aef5b1be468 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceUsageStats.java @@ -0,0 +1,89 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vespa.clustercontroller.core.hostinfo.ContentNode; + +import java.util.Collection; +import java.util.Map; +import java.util.Optional; + +/** + * Represents resource usage stats for the cluster that are exposed as metrics. + */ +public class ResourceUsageStats { + + // Max disk utilization (usage / limit) among all content nodes. + private final double maxDiskUtilization; + + // Max memory utilization (usage / limit) among all content nodes. + private final double maxMemoryUtilization; + + // The number of content nodes that are above at least one resource limit. + // When this is above zero feed is blocked in the cluster. + private final int nodesAboveLimit; + + private static final String diskResource = "disk"; + private static final String memoryResource = "memory"; + + public ResourceUsageStats() { + this.maxDiskUtilization = 0.0; + this.maxMemoryUtilization = 0.0; + this.nodesAboveLimit = 0; + } + + public ResourceUsageStats(double maxDiskUtilization, + double maxMemoryUtilization, + int nodesAboveLimit) { + this.maxDiskUtilization = maxDiskUtilization; + this.maxMemoryUtilization = maxMemoryUtilization; + this.nodesAboveLimit = nodesAboveLimit; + } + + public double getMaxDiskUtilization() { + return maxDiskUtilization; + } + + public double getMaxMemoryUtilization() { + return maxMemoryUtilization; + } + + public int getNodesAboveLimit() { + return nodesAboveLimit; + } + + public static ResourceUsageStats calculateFrom(Collection<NodeInfo> nodeInfos, + Map<String, Double> feedBlockLimits, + Optional<ClusterStateBundle.FeedBlock> feedBlock) { + double maxDiskUsage = 0.0; + double maxMemoryUsage = 0.0; + for (NodeInfo info : nodeInfos) { + if (info.isStorage()) { + var node = info.getHostInfo().getContentNode(); + maxDiskUsage = Double.max(maxDiskUsage, resourceUsageOf(diskResource, node)); + maxMemoryUsage = Double.max(maxMemoryUsage, resourceUsageOf(memoryResource, node)); + } + } + return new ResourceUsageStats(maxDiskUsage / limitOf(diskResource, feedBlockLimits), + maxMemoryUsage / limitOf(memoryResource, feedBlockLimits), + calculateNodesAboveLimit(feedBlock)); + } + + private static double resourceUsageOf(String type, ContentNode node) { + var result = node.resourceUsageOf(type); + return result.isPresent() ? result.get().getUsage() : 0.0; + } + + private static int calculateNodesAboveLimit(Optional<ClusterStateBundle.FeedBlock> feedBlock) { + if (!feedBlock.isPresent()) { + return 0; + } + var exhaustions = feedBlock.get().getConcreteExhaustions(); + return (int) exhaustions.stream().map(resource -> resource.node).distinct().count(); + } + + private static double limitOf(String type, Map<String, Double> limits) { + var result = limits.get(type); + return (result != null) ? result : 1.0; + } +} + diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java index ac4cb25a9c1..6d2f66d76d5 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java @@ -14,6 +14,7 @@ import java.util.List; import java.util.Map; import java.util.TimeZone; import java.util.TreeMap; +import java.util.stream.Collectors; /** * Renders web page with cluster status. @@ -205,6 +206,35 @@ public class VdsClusterHtmlRenderer { addRpcAddress(nodeInfo, row); table.addRow(row); + if (nodeType.equals(NodeType.STORAGE)) { + addFeedBlockedRowIfNodeIsBlocking(stateBundle, nodeInfo, row); + } + } + } + + private void addFeedBlockedRowIfNodeIsBlocking(ClusterStateBundle stateBundle, NodeInfo nodeInfo, HtmlTable.Row nodeRow) { + // We only show a feed block row if the node is actually blocking feed in the cluster, not + // just if limits have been exceeded (as feed block may be config disabled). + // O(n) but n expected to be 0-(very small number) in all realistic cases. + if (stateBundle.clusterFeedIsBlocked()) { + var exhaustions = stateBundle.getFeedBlockOrNull().getConcreteExhaustions().stream() + .filter(ex -> ex.node.getIndex() == nodeInfo.getNodeIndex()) + .collect(Collectors.toList()); + if (!exhaustions.isEmpty()) { + var exhaustionsDesc = exhaustions.stream() + .map(NodeResourceExhaustion::toShorthandDescription) + .collect(Collectors.joining(", ")); + + HtmlTable.Row feedBlockRow = new HtmlTable.Row(); + var contents = String.format("<strong>Node is blocking feed: %s</strong>", HtmlTable.escape(exhaustionsDesc)); + var cell = new HtmlTable.Cell(contents).addProperties(ERROR_PROPERTY); + cell.addProperties(new HtmlTable.CellProperties().setColSpan(18)); + feedBlockRow.addCell(cell); + table.addRow(feedBlockRow); + // Retroactively make the node index cell span 2 rows so it's obvious (hopefully) + // what node the feed block state is related to. + nodeRow.cells.get(0).addProperties(new HtmlTable.CellProperties().setRowSpan(2)); + } } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java index 650e4dc7888..2254435e629 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.ResourceUsage; import java.util.Arrays; @@ -12,6 +13,8 @@ import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode; + public class FeedBlockUtil { static class NodeAndUsages { @@ -90,5 +93,22 @@ public class FeedBlockUtil { return Arrays.stream(exhaustions).collect(Collectors.toCollection(LinkedHashSet::new)); } + static ClusterFixture createFixtureWithReportedUsages(NodeAndUsages... nodeAndUsages) { + var highestIndex = Arrays.stream(nodeAndUsages).mapToInt(u -> u.index).max(); + if (highestIndex.isEmpty()) { + throw new IllegalArgumentException("Can't have an empty cluster"); + } + var cf = ClusterFixture + .forFlatCluster(highestIndex.getAsInt() + 1) + .assignDummyRpcAddresses() + .bringEntireClusterUp(); + for (var nu : nodeAndUsages) { + cf.cluster().getNodeInfo(storageNode(nu.index)) + .setHostInfo(HostInfo.createHostInfo(createResourceUsageJson(nu.usages))); + } + return cf; + } + + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MetricReporterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MetricReporterTest.java index cf50c106b2e..68b5bf103a6 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MetricReporterTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MetricReporterTest.java @@ -39,20 +39,26 @@ public class MetricReporterTest { } } - private static HasMetricContext.Dimension[] withNodeTypeDimension(String type) { + private static HasMetricContext.Dimension[] withClusterDimension() { // Dimensions that are always present HasMetricContext.Dimension controllerDim = withDimension("controller-index", "0"); HasMetricContext.Dimension clusterDim = withDimension("cluster", "foo"); + return new HasMetricContext.Dimension[] { controllerDim, clusterDim }; + } + + private static HasMetricContext.Dimension[] withNodeTypeDimension(String type) { // Node type-specific dimension HasMetricContext.Dimension nodeType = withDimension("node-type", type); - return new HasMetricContext.Dimension[] { controllerDim, clusterDim, nodeType }; + var otherDims = withClusterDimension(); + return new HasMetricContext.Dimension[] { otherDims[0], otherDims[1], nodeType }; } @Test public void metrics_are_emitted_for_different_node_state_counts() { Fixture f = new Fixture(); f.metricUpdater.updateClusterStateMetrics(f.clusterFixture.cluster(), - ClusterState.stateFromString("distributor:10 .1.s:d storage:9 .1.s:d .2.s:m .4.s:d")); + ClusterState.stateFromString("distributor:10 .1.s:d storage:9 .1.s:d .2.s:m .4.s:d"), + new ResourceUsageStats()); verify(f.mockReporter).set(eq("cluster-controller.up.count"), eq(9), argThat(hasMetricContext(withNodeTypeDimension("distributor")))); @@ -68,7 +74,8 @@ public class MetricReporterTest { private void doTestRatiosInState(String clusterState, double distributorRatio, double storageRatio) { Fixture f = new Fixture(); - f.metricUpdater.updateClusterStateMetrics(f.clusterFixture.cluster(), ClusterState.stateFromString(clusterState)); + f.metricUpdater.updateClusterStateMetrics(f.clusterFixture.cluster(), ClusterState.stateFromString(clusterState), + new ResourceUsageStats()); verify(f.mockReporter).set(eq("cluster-controller.available-nodes.ratio"), doubleThat(closeTo(distributorRatio, 0.0001)), @@ -100,4 +107,24 @@ public class MetricReporterTest { doTestRatiosInState("distributor:10 storage:10 .0.s:m", 1.0, 1.0); } + @Test + public void metrics_are_emitted_for_resource_usage() { + Fixture f = new Fixture(); + f.metricUpdater.updateClusterStateMetrics(f.clusterFixture.cluster(), + ClusterState.stateFromString("distributor:10 storage:10"), + new ResourceUsageStats(0.5, 0.6, 5)); + + verify(f.mockReporter).set(eq("cluster-controller.resource_usage.max_disk_utilization"), + doubleThat(closeTo(0.5, 0.0001)), + argThat(hasMetricContext(withClusterDimension()))); + + verify(f.mockReporter).set(eq("cluster-controller.resource_usage.max_memory_utilization"), + doubleThat(closeTo(0.6, 0.0001)), + argThat(hasMetricContext(withClusterDimension()))); + + verify(f.mockReporter).set(eq("cluster-controller.resource_usage.nodes_above_limit"), + eq(5), + argThat(hasMetricContext(withClusterDimension()))); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index 54686919a7b..f5f7b4676d8 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -1,19 +1,13 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import org.junit.Test; -import java.util.Arrays; - import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode; -import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.NodeAndUsages; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createFixtureWithReportedUsages; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.forNode; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; -import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.setOf; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; -import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createResourceUsageJson; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -21,22 +15,6 @@ import static org.junit.Assert.assertTrue; public class ResourceExhaustionCalculatorTest { - private static ClusterFixture createFixtureWithReportedUsages(NodeAndUsages... nodeAndUsages) { - var highestIndex = Arrays.stream(nodeAndUsages).mapToInt(u -> u.index).max(); - if (highestIndex.isEmpty()) { - throw new IllegalArgumentException("Can't have an empty cluster"); - } - var cf = ClusterFixture - .forFlatCluster(highestIndex.getAsInt() + 1) - .assignDummyRpcAddresses() - .bringEntireClusterUp(); - for (var nu : nodeAndUsages) { - cf.cluster().getNodeInfo(storageNode(nu.index)) - .setHostInfo(HostInfo.createHostInfo(createResourceUsageJson(nu.usages))); - } - return cf; - } - @Test public void no_feed_block_returned_when_no_resources_lower_than_limit() { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceUsageStatsTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceUsageStatsTest.java new file mode 100644 index 00000000000..9eeb36265e0 --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceUsageStatsTest.java @@ -0,0 +1,76 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import org.junit.Test; + +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; + +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createFixtureWithReportedUsages; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.exhaustion; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.forNode; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.setOf; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; + +import static org.junit.Assert.assertEquals; + +public class ResourceUsageStatsTest { + + private final double DELTA = 0.00001; + + @Test + public void disk_and_memory_utilization_is_max_among_all_content_nodes() { + var stats = ResourceUsageStats.calculateFrom(createNodeInfo( + forNode(1, usage("disk", 0.3), usage("memory", 0.6)), + forNode(2, usage("disk", 0.4), usage("memory", 0.5))), + createFeedBlockLimits(0.8, 0.9), + Optional.empty()); + assertEquals(0.4 / 0.8, stats.getMaxDiskUtilization(), DELTA); + assertEquals(0.6 / 0.9, stats.getMaxMemoryUtilization(), DELTA); + } + + @Test + public void disk_and_memory_utilization_is_zero_if_no_samples_are_available() { + var stats = ResourceUsageStats.calculateFrom(createNodeInfo( + forNode(1), forNode(2)), + createFeedBlockLimits(0.8, 0.9), + Optional.empty()); + assertEquals(0.0, stats.getMaxDiskUtilization(), DELTA); + assertEquals(0.0, stats.getMaxMemoryUtilization(), DELTA); + } + + @Test + public void nodes_above_limit_is_zero_without_feed_block_status() { + var stats = ResourceUsageStats.calculateFrom(Collections.emptyList(), Collections.emptyMap(), Optional.empty()); + assertEquals(0, stats.getNodesAboveLimit()); + } + + @Test + public void nodes_above_limit_is_equal_to_node_resource_exhaustions() { + var stats = ResourceUsageStats.calculateFrom(Collections.emptyList(), Collections.emptyMap(), + createFeedBlock(exhaustion(1, "disk"), exhaustion(2, "memory"))); + assertEquals(2, stats.getNodesAboveLimit()); + } + + @Test + public void nodes_above_limit_counts_each_node_only_once() { + var stats = ResourceUsageStats.calculateFrom(Collections.emptyList(), Collections.emptyMap(), + createFeedBlock(exhaustion(1, "disk"), exhaustion(1, "memory"))); + assertEquals(1, stats.getNodesAboveLimit()); + } + + private static Collection<NodeInfo> createNodeInfo(FeedBlockUtil.NodeAndUsages... nodeAndUsages) { + return createFixtureWithReportedUsages(nodeAndUsages).cluster().getNodeInfo(); + } + + private static Map<String, Double> createFeedBlockLimits(double diskLimit, double memoryLimit) { + return Map.of("disk", diskLimit, "memory", memoryLimit); + } + + private static Optional<ClusterStateBundle.FeedBlock> createFeedBlock(NodeResourceExhaustion... exhaustions) { + return Optional.of(new ClusterStateBundle.FeedBlock(true, "", setOf(exhaustions))); + } +} + diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/expressiontransforms/TokenTransformer.java b/config-model/src/main/java/com/yahoo/searchdefinition/expressiontransforms/TokenTransformer.java index 192fb9baa9a..032341297bf 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/expressiontransforms/TokenTransformer.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/expressiontransforms/TokenTransformer.java @@ -114,14 +114,20 @@ public class TokenTransformer extends ExpressionTransformer<RankProfileTransform /** * Transforms a feature of the form * - * tokenTypeIds(128, a, ...) + * tokenTypeIds(128, a, b, ...) * * to an expression that generates a tensor that has values 0 for "a" * (including CLS and SEP tokens) and 1 for the rest of the sequence. * * Concretely, transforms to a tensor generation expression: * - * tensor(d0[1],d1[128])(if(d1 < length_a + 2, 0, 1)) + * tensor(d0[1],d1[128])( + * if (d1 < 1 + length_a + 1, + * 0, + * if (d1 < 1 + length_a + 1 + length_b + 1 + ..., + * 1, + * 0 + * ))) */ private ExpressionNode transformTokenTypeIds(ReferenceNode feature, RankProfileTransformContext context) { checkArguments(feature); @@ -131,11 +137,18 @@ public class TokenTransformer extends ExpressionTransformer<RankProfileTransform // we need to add functions calculating the token lengths of the arguments createTokenLengthFunctions(feature, context); - ReferenceNode arg = (ReferenceNode) feature.getArguments().expressions().get(1); - ExpressionNode argLength = new ReferenceNode(lengthFunctionName(arg)); - ExpressionNode lengthExpr = new ArithmeticNode(argLength, ArithmeticOperator.PLUS, TWO); - ComparisonNode comparison = new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, lengthExpr); - ExpressionNode expr = new IfNode(comparison, ZERO, ONE); + List<ExpressionNode> tokenSequence = createTokenSequence(feature); + ExpressionNode queryLengthExpr = createLengthExpr(2, tokenSequence); + ExpressionNode restLengthExpr = createLengthExpr(tokenSequence.size() - 1, tokenSequence); + ExpressionNode expr = new IfNode( + new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, queryLengthExpr), + ZERO, + new IfNode( + new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, restLengthExpr), + ONE, + ZERO + ) + ); return new TensorFunctionNode(Generate.bound(type, wrapScalar(expr))); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 22268bddf4d..219095ae41f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -241,6 +241,10 @@ public class VespaMetricSet { // DO NOT RELY ON THIS METRIC YET. metrics.add(new Metric("cluster-controller.node-event.count")); + metrics.add(new Metric("cluster-controller.resource_usage.nodes_above_limit.last")); + metrics.add(new Metric("cluster-controller.resource_usage.max_memory_utilization.last")); + metrics.add(new Metric("cluster-controller.resource_usage.max_disk_utilization.last")); + metrics.add(new Metric("reindexing.progress.last")); return metrics; diff --git a/config-model/src/test/integration/onnx-model/schemas/test.sd b/config-model/src/test/integration/onnx-model/schemas/test.sd index 5b440e80bed..4f45e0f6318 100644 --- a/config-model/src/test/integration/onnx-model/schemas/test.sd +++ b/config-model/src/test/integration/onnx-model/schemas/test.sd @@ -101,7 +101,7 @@ search test { rank-profile test_dynamic_model_with_transformer_tokens { function my_function() { - expression: tokenTypeIds(2, attribute(document_field)) + expression: tokenTypeIds(10, attribute(document_field), attribute(document_field)) } first-phase { expression: onnx(dynamic_model){d0:0,d1:1} diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithOnnxModelTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithOnnxModelTestCase.java index 73ff4ac3bcd..a3ad9f4f4ba 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithOnnxModelTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithOnnxModelTestCase.java @@ -152,7 +152,7 @@ public class RankingExpressionWithOnnxModelTestCase { assertEquals("test_dynamic_model_with_transformer_tokens", config.rankprofile(7).name()); assertEquals("rankingExpression(my_function).rankingScript", config.rankprofile(7).fef().property(1).name()); - assertEquals("tensor<float>(d0[1],d1[2])((if (d1 < rankingExpression(__token_length@-1993461420) + 2, 0, 1)))", config.rankprofile(7).fef().property(1).value()); + assertEquals("tensor<float>(d0[1],d1[10])((if (d1 < 1 + rankingExpression(__token_length@-1993461420) + 1, 0, if (d1 < 1 + rankingExpression(__token_length@-1993461420) + 1 + rankingExpression(__token_length@-1993461420) + 1, 1, 0))))", config.rankprofile(7).fef().property(1).value()); assertEquals("test_unbound_model", config.rankprofile(8).name()); assertEquals("rankingExpression(my_function).rankingScript", config.rankprofile(8).fef().property(0).name()); diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithTransformerTokensTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithTransformerTokensTestCase.java index 174e685b112..c64dbcdef03 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithTransformerTokensTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithTransformerTokensTestCase.java @@ -35,7 +35,7 @@ public class RankingExpressionWithTransformerTokensTestCase { @Test public void testTokenTypeIds() throws Exception { - String expected = "tensor(d0[1],d1[10]):[0,0,0,0,1,1,1,1,1,1]"; + String expected = "tensor(d0[1],d1[10]):[0,0,0,0,1,1,1,1,0,0]"; String a = "tensor(d0[2]):[1,2]"; String b = "tensor(d0[3]):[3,4,5]"; String expression = "tokenTypeIds(10, a, b)"; diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/DeploymentData.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/DeploymentData.java index 78f8197062c..702c6bd0f41 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/DeploymentData.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/application/v4/model/DeploymentData.java @@ -5,7 +5,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.athenz.api.AthenzDomain; -import com.yahoo.vespa.hosted.controller.api.integration.aws.ApplicationRoles; +import com.yahoo.vespa.hosted.controller.api.integration.aws.TenantRoles; import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateMetadata; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ContainerEndpoint; @@ -30,7 +30,7 @@ public class DeploymentData { private final Optional<EndpointCertificateMetadata> endpointCertificateMetadata; private final Optional<DockerImage> dockerImageRepo; private final Optional<AthenzDomain> athenzDomain; - private final Optional<ApplicationRoles> applicationRoles; + private final Optional<TenantRoles> tenantRoles; private final Quota quota; public DeploymentData(ApplicationId instance, ZoneId zone, byte[] applicationPackage, Version platform, @@ -38,7 +38,7 @@ public class DeploymentData { Optional<EndpointCertificateMetadata> endpointCertificateMetadata, Optional<DockerImage> dockerImageRepo, Optional<AthenzDomain> athenzDomain, - Optional<ApplicationRoles> applicationRoles, + Optional<TenantRoles> tenantRoles, Quota quota) { this.instance = requireNonNull(instance); this.zone = requireNonNull(zone); @@ -48,7 +48,7 @@ public class DeploymentData { this.endpointCertificateMetadata = requireNonNull(endpointCertificateMetadata); this.dockerImageRepo = requireNonNull(dockerImageRepo); this.athenzDomain = athenzDomain; - this.applicationRoles = applicationRoles; + this.tenantRoles = tenantRoles; this.quota = quota; } @@ -84,8 +84,8 @@ public class DeploymentData { return athenzDomain; } - public Optional<ApplicationRoles> applicationRoles() { - return applicationRoles; + public Optional<TenantRoles> tenantRoles() { + return tenantRoles; } public Quota quota() { diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/NoopRoleService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/NoopRoleService.java index 81fec1582d0..dceb56d14c1 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/NoopRoleService.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/NoopRoleService.java @@ -12,16 +12,11 @@ import java.util.Optional; public class NoopRoleService implements RoleService { @Override - public Optional<ApplicationRoles> createApplicationRoles(ApplicationId applicationId) { + public Optional<TenantRoles> createTenantRole(TenantName tenant) { return Optional.empty(); } @Override - public String createTenantRole(TenantName tenant) { - return ""; - } - - @Override public String createTenantPolicy(TenantName tenant, String policyName, String awsId, String role) { return ""; } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/RoleService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/RoleService.java index 93c86c406b4..3c04546f479 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/RoleService.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/RoleService.java @@ -11,9 +11,7 @@ import java.util.Optional; */ public interface RoleService { - Optional<ApplicationRoles> createApplicationRoles(ApplicationId applicationId); - - String createTenantRole(TenantName tenant); + Optional<TenantRoles> createTenantRole(TenantName tenant); String createTenantPolicy(TenantName tenant, String policyName, String awsId, String role); diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/ApplicationRoles.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/TenantRoles.java index de3e84ac0c3..4c5e3257d0e 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/ApplicationRoles.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/aws/TenantRoles.java @@ -4,11 +4,11 @@ package com.yahoo.vespa.hosted.controller.api.integration.aws; /** * @author mortent */ -public class ApplicationRoles { +public class TenantRoles { private final String hostRole; private final String containerRole; - public ApplicationRoles(String hostRole, String containerRole) { + public TenantRoles(String hostRole, String containerRole) { this.hostRole = hostRole; this.containerRole = containerRole; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index d064bb17b2c..c966a923139 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -17,10 +17,8 @@ import com.yahoo.vespa.athenz.api.AthenzPrincipal; import com.yahoo.vespa.athenz.api.AthenzService; import com.yahoo.vespa.athenz.api.AthenzUser; import com.yahoo.vespa.curator.Lock; -import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.StringFlag; import com.yahoo.vespa.hosted.controller.api.ActivateResult; @@ -30,7 +28,7 @@ import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbi import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.InstanceId; import com.yahoo.vespa.hosted.controller.api.identifiers.RevisionId; -import com.yahoo.vespa.hosted.controller.api.integration.aws.ApplicationRoles; +import com.yahoo.vespa.hosted.controller.api.integration.aws.TenantRoles; import com.yahoo.vespa.hosted.controller.api.integration.billing.BillingController; import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateMetadata; @@ -365,7 +363,7 @@ public class ApplicationController { try (Lock deploymentLock = lockForDeployment(job.application(), zone)) { Set<ContainerEndpoint> endpoints; Optional<EndpointCertificateMetadata> endpointCertificateMetadata; - Optional<ApplicationRoles> applicationRoles = Optional.empty(); + Optional<TenantRoles> tenantRoles = Optional.empty(); Run run = controller.jobController().last(job) .orElseThrow(() -> new IllegalStateException("No known run of '" + job + "'")); @@ -400,7 +398,7 @@ public class ApplicationController { } // Release application lock while doing the deployment, which is a lengthy task. // Carry out deployment without holding the application lock. - ActivateResult result = deploy(job.application(), applicationPackage, zone, platform, endpoints, endpointCertificateMetadata, applicationRoles); + ActivateResult result = deploy(job.application(), applicationPackage, zone, platform, endpoints, endpointCertificateMetadata, tenantRoles); // Record the quota usage for this application var quotaUsage = deploymentQuotaUsage(zone, job.application()); @@ -574,7 +572,7 @@ public class ApplicationController { private ActivateResult deploy(ApplicationId application, ApplicationPackage applicationPackage, ZoneId zone, Version platform, Set<ContainerEndpoint> endpoints, Optional<EndpointCertificateMetadata> endpointCertificateMetadata, - Optional<ApplicationRoles> applicationRoles) { + Optional<TenantRoles> tenantRoles) { try { Optional<DockerImage> dockerImageRepo = Optional.ofNullable( dockerImageRepoFlag @@ -599,7 +597,7 @@ public class ApplicationController { ConfigServer.PreparedApplication preparedApplication = configServer.deploy(new DeploymentData(application, zone, applicationPackage.zippedContent(), platform, endpoints, endpointCertificateMetadata, dockerImageRepo, domain, - applicationRoles, deploymentQuota)); + tenantRoles, deploymentQuota)); return new ActivateResult(new RevisionId(applicationPackage.hash()), preparedApplication.prepareResponse(), applicationPackage.zippedContent().length); diff --git a/jdisc_http_service/src/main/java/com/yahoo/container/logging/FileConnectionLog.java b/jdisc_http_service/src/main/java/com/yahoo/container/logging/FileConnectionLog.java index c28003c74da..7432c313286 100644 --- a/jdisc_http_service/src/main/java/com/yahoo/container/logging/FileConnectionLog.java +++ b/jdisc_http_service/src/main/java/com/yahoo/container/logging/FileConnectionLog.java @@ -5,19 +5,16 @@ package com.yahoo.container.logging; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; -import java.util.logging.Logger; - /** * @author mortent */ public class FileConnectionLog extends AbstractComponent implements ConnectionLog { - private static final Logger logger = Logger.getLogger(FileConnectionLog.class.getName()); private final ConnectionLogHandler logHandler; @Inject public FileConnectionLog(ConnectionLogConfig config) { - logHandler = new ConnectionLogHandler(config.cluster(), config.logDirectoryName(), config.queueSize(), new JsonConnectionLogWriter()); + logHandler = new ConnectionLogHandler(config.logDirectoryName(), config.cluster(), config.queueSize(), new JsonConnectionLogWriter()); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 00327dc0002..1ca8b5782b8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -8,19 +8,26 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.TenantName; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancers; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.Generation; import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.node.IP; +import com.yahoo.vespa.hosted.provision.node.NodeAcl; import com.yahoo.vespa.hosted.provision.node.Reports; import com.yahoo.vespa.hosted.provision.node.Status; import java.time.Instant; import java.util.Arrays; +import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.TreeSet; /** * A node in the node repository. The identity of a node is given by its id. @@ -429,6 +436,11 @@ public final class Node implements Nodelike { .deviation(); } + /** Returns the ACL for the node (trusted nodes, networks and ports) */ + public NodeAcl acl(NodeList allNodes, LoadBalancers loadBalancers) { + return NodeAcl.from(this, allNodes, loadBalancers); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 8e14b61db9a..2642983dd2a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -20,10 +20,7 @@ import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.applications.Applications; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancerList; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancers; import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions; import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer; import com.yahoo.vespa.hosted.provision.maintenance.PeriodicApplicationMaintainer; @@ -50,17 +47,13 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; -import java.util.Comparator; import java.util.EnumSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.TreeSet; import java.util.function.BiFunction; -import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -111,6 +104,7 @@ public class NodeRepository extends AbstractComponent { private final ContainerImages containerImages; private final JobControl jobControl; private final Applications applications; + private final LoadBalancers loadBalancers; private final int spareCount; /** @@ -171,6 +165,7 @@ public class NodeRepository extends AbstractComponent { this.containerImages = new ContainerImages(db, containerImage); this.jobControl = new JobControl(new JobControlFlags(db, flagSource)); this.applications = new Applications(db); + this.loadBalancers = new LoadBalancers(db); this.spareCount = spareCount; rewriteNodes(); } @@ -213,15 +208,30 @@ public class NodeRepository extends AbstractComponent { /** Returns this node repo's view of the applications deployed to it */ public Applications applications() { return applications; } - public NodeFlavors flavors() { - return flavors; - } + /** Returns the load balancers available in this node repo */ + public LoadBalancers loadBalancers() { return loadBalancers; } + + public NodeFlavors flavors() { return flavors; } public HostResourcesCalculator resourcesCalculator() { return resourcesCalculator; } /** The number of nodes we should ensure has free capacity for node failures whenever possible */ public int spareCount() { return spareCount; } + /** + * Returns ACLs for the children of the given host. + * + * @param host node for which to generate ACLs + * @return the list of node ACLs + */ + public List<NodeAcl> getChildAcls(Node host) { + if ( ! host.type().isHost()) throw new IllegalArgumentException("Only hosts have children"); + NodeList allNodes = list(); + return list().childrenOf(host).asList().stream() + .map(childNode -> childNode.acl(allNodes, loadBalancers)) + .collect(Collectors.toUnmodifiableList()); + } + // ---------------- Query API ---------------------------------------------------------------- /** @@ -274,125 +284,11 @@ public class NodeRepository extends AbstractComponent { return new LockedNodeList(getNodes(), lock); } - /** Returns a filterable list of all load balancers in this repository */ - public LoadBalancerList loadBalancers() { - return loadBalancers((ignored) -> true); - } - - /** Returns a filterable list of load balancers belonging to given application */ - public LoadBalancerList loadBalancers(ApplicationId application) { - return loadBalancers((id) -> id.application().equals(application)); - } - - private LoadBalancerList loadBalancers(Predicate<LoadBalancerId> predicate) { - return LoadBalancerList.copyOf(db.readLoadBalancers(predicate).values()); - } - public List<Node> getNodes(ApplicationId id, State ... inState) { return db.readNodes(id, inState); } public List<Node> getInactive() { return db.readNodes(State.inactive); } public List<Node> getFailed() { return db.readNodes(State.failed); } /** - * Returns the ACL for the node (trusted nodes, networks and ports) - */ - private NodeAcl getNodeAcl(Node node, NodeList candidates) { - Set<Node> trustedNodes = new TreeSet<>(Comparator.comparing(Node::hostname)); - Set<Integer> trustedPorts = new LinkedHashSet<>(); - Set<String> trustedNetworks = new LinkedHashSet<>(); - - // For all cases below, trust: - // - SSH: If the Docker host has one container, and it is using the Docker host's network namespace, - // opening up SSH to the Docker host is done here as a trusted port. For simplicity all nodes have - // SSH opened (which is safe for 2 reasons: SSH daemon is not run inside containers, and NPT networks - // will (should) not forward port 22 traffic to container). - // - parent host (for health checks and metrics) - // - nodes in same application - // - load balancers allocated to application - trustedPorts.add(22); - candidates.parentOf(node).ifPresent(trustedNodes::add); - node.allocation().ifPresent(allocation -> { - trustedNodes.addAll(candidates.owner(allocation.owner()).asList()); - loadBalancers(allocation.owner()).asList().stream() - .map(LoadBalancer::instance) - .map(LoadBalancerInstance::networks) - .forEach(trustedNetworks::addAll); - }); - - switch (node.type()) { - case tenant: - // Tenant nodes in other states than ready, trust: - // - config servers - // - proxy nodes - // - parents of the nodes in the same application: If some of the nodes are on a different IP versions - // or only a subset of them are dual-stacked, the communication between the nodes may be NATed - // with via parent's IP address. - trustedNodes.addAll(candidates.nodeType(NodeType.config).asList()); - trustedNodes.addAll(candidates.nodeType(NodeType.proxy).asList()); - node.allocation().ifPresent(allocation -> - trustedNodes.addAll(candidates.parentsOf(candidates.owner(allocation.owner())).asList())); - - if (node.state() == State.ready) { - // Tenant nodes in state ready, trust: - // - All tenant nodes in zone. When a ready node is allocated to a an application there's a brief - // window where current ACLs have not yet been applied on the node. To avoid service disruption - // during this window, ready tenant nodes trust all other tenant nodes. - trustedNodes.addAll(candidates.nodeType(NodeType.tenant).asList()); - } - break; - - case config: - // Config servers trust: - // - all nodes - // - port 4443 from the world - trustedNodes.addAll(candidates.asList()); - trustedPorts.add(4443); - break; - - case proxy: - // Proxy nodes trust: - // - config servers - // - all connections from the world on 4080 (insecure tb removed), and 4443 - trustedNodes.addAll(candidates.nodeType(NodeType.config).asList()); - trustedPorts.add(443); - trustedPorts.add(4080); - trustedPorts.add(4443); - break; - - case controller: - // Controllers: - // - port 4443 (HTTPS + Athenz) from the world - // - port 443 (HTTPS + Okta) from the world - // - port 80 (HTTP) from the world - for redirect to HTTPS/443 only - trustedPorts.add(4443); - trustedPorts.add(443); - trustedPorts.add(80); - break; - - default: - illegal("Don't know how to create ACL for " + node + " of type " + node.type()); - } - - return new NodeAcl(node, trustedNodes, trustedNetworks, trustedPorts); - } - - /** - * Creates a list of node ACLs which identify which nodes the given node should trust - * - * @param node Node for which to generate ACLs - * @param children Return ACLs for the children of the given node (e.g. containers on a Docker host) - * @return List of node ACLs - */ - public List<NodeAcl> getNodeAcls(Node node, boolean children) { - NodeList candidates = list(); - if (children) { - return candidates.childrenOf(node).asList().stream() - .map(childNode -> getNodeAcl(childNode, candidates)) - .collect(Collectors.toUnmodifiableList()); - } - return List.of(getNodeAcl(node, candidates)); - } - - /** * Returns whether the zone managed by this node repository seems to be working. * If too many nodes are not responding, there is probably some zone-wide issue * and we should probably refrain from making changes to it. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java new file mode 100644 index 00000000000..9a2652e884c --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java @@ -0,0 +1,48 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.lb; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.NodeAcl; +import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; + +import java.util.Comparator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * The load balancers of this node repo. + * + * @author bratseth + * @author mpolden + */ +public class LoadBalancers { + + private final CuratorDatabaseClient db; + + public LoadBalancers(CuratorDatabaseClient db) { + this.db = db; + } + + /** Returns a filterable list of all load balancers in this repository */ + public LoadBalancerList list() { + return list((ignored) -> true); + } + + /** Returns a filterable list of load balancers belonging to given application */ + public LoadBalancerList list(ApplicationId application) { + return list((id) -> id.application().equals(application)); + } + + private LoadBalancerList list(Predicate<LoadBalancerId> predicate) { + return LoadBalancerList.copyOf(db.readLoadBalancers(predicate).values()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java index b24fba83b12..4116d58f2d1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java @@ -2,10 +2,18 @@ package com.yahoo.vespa.hosted.provision.node; import com.google.common.collect.ImmutableSet; +import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancers; +import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.Objects; import java.util.Set; +import java.util.TreeSet; /** * A node ACL. The ACL contains the node which the ACL is valid for, @@ -20,7 +28,7 @@ public class NodeAcl { private final Set<String> trustedNetworks; private final Set<Integer> trustedPorts; - public NodeAcl(Node node, Set<Node> trustedNodes, Set<String> trustedNetworks, Set<Integer> trustedPorts) { + private NodeAcl(Node node, Set<Node> trustedNodes, Set<String> trustedNetworks, Set<Integer> trustedPorts) { this.node = Objects.requireNonNull(node, "node must be non-null"); this.trustedNodes = ImmutableSet.copyOf(Objects.requireNonNull(trustedNodes, "trustedNodes must be non-null")); this.trustedNetworks = ImmutableSet.copyOf(Objects.requireNonNull(trustedNetworks, "trustedNetworks must be non-null")); @@ -43,4 +51,83 @@ public class NodeAcl { return trustedPorts; } + public static NodeAcl from(Node node, NodeList allNodes, LoadBalancers loadBalancers) { + Set<Node> trustedNodes = new TreeSet<>(Comparator.comparing(Node::hostname)); + Set<Integer> trustedPorts = new LinkedHashSet<>(); + Set<String> trustedNetworks = new LinkedHashSet<>(); + + // For all cases below, trust: + // - SSH: If the Docker host has one container, and it is using the Docker host's network namespace, + // opening up SSH to the Docker host is done here as a trusted port. For simplicity all nodes have + // SSH opened (which is safe for 2 reasons: SSH daemon is not run inside containers, and NPT networks + // will (should) not forward port 22 traffic to container). + // - parent host (for health checks and metrics) + // - nodes in same application + // - load balancers allocated to application + trustedPorts.add(22); + allNodes.parentOf(node).ifPresent(trustedNodes::add); + node.allocation().ifPresent(allocation -> { + trustedNodes.addAll(allNodes.owner(allocation.owner()).asList()); + loadBalancers.list(allocation.owner()).asList() + .stream() + .map(LoadBalancer::instance) + .map(LoadBalancerInstance::networks) + .forEach(trustedNetworks::addAll); + }); + + switch (node.type()) { + case tenant: + // Tenant nodes in other states than ready, trust: + // - config servers + // - proxy nodes + // - parents of the nodes in the same application: If some of the nodes are on a different IP versions + // or only a subset of them are dual-stacked, the communication between the nodes may be NATed + // with via parent's IP address. + trustedNodes.addAll(allNodes.nodeType(NodeType.config).asList()); + trustedNodes.addAll(allNodes.nodeType(NodeType.proxy).asList()); + node.allocation().ifPresent(allocation -> + trustedNodes.addAll(allNodes.parentsOf(allNodes.owner(allocation.owner())).asList())); + + if (node.state() == Node.State.ready) { + // Tenant nodes in state ready, trust: + // - All tenant nodes in zone. When a ready node is allocated to a an application there's a brief + // window where current ACLs have not yet been applied on the node. To avoid service disruption + // during this window, ready tenant nodes trust all other tenant nodes. + trustedNodes.addAll(allNodes.nodeType(NodeType.tenant).asList()); + } + break; + + case config: + // Config servers trust: + // - all nodes + // - port 4443 from the world + trustedNodes.addAll(allNodes.asList()); + trustedPorts.add(4443); + break; + + case proxy: + // Proxy nodes trust: + // - config servers + // - all connections from the world on 4080 (insecure tb removed), and 4443 + trustedNodes.addAll(allNodes.nodeType(NodeType.config).asList()); + trustedPorts.add(443); + trustedPorts.add(4080); + trustedPorts.add(4443); + break; + + case controller: + // Controllers: + // - port 4443 (HTTPS + Athenz) from the world + // - port 443 (HTTPS + Okta) from the world + trustedPorts.add(4443); + trustedPorts.add(443); + break; + + default: + throw new IllegalArgumentException("Don't know how to create ACL for " + node + + " of type " + node.type()); + } + return new NodeAcl(node, trustedNodes, trustedNetworks, trustedPorts); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index a048f8bb8d2..cfcaa1f83c0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -115,12 +115,12 @@ public class LoadBalancerProvisioner { * load balancer(s). */ public void deactivate(ApplicationTransaction transaction) { - deactivate(nodeRepository.loadBalancers(transaction.application()).asList(), transaction.nested()); + deactivate(nodeRepository.loadBalancers().list(transaction.application()).asList(), transaction.nested()); } /** Returns load balancers of given application that are no longer referenced by given clusters */ private List<LoadBalancer> surplusLoadBalancersOf(ApplicationId application, Set<ClusterSpec.Id> activeClusters) { - var activeLoadBalancersByCluster = nodeRepository.loadBalancers(application) + var activeLoadBalancersByCluster = nodeRepository.loadBalancers().list(application) .in(LoadBalancer.State.active) .asList() .stream() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LoadBalancersResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LoadBalancersResponse.java index 092426c75d4..1ef449555d9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LoadBalancersResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/LoadBalancersResponse.java @@ -40,9 +40,9 @@ public class LoadBalancersResponse extends HttpResponse { LoadBalancerList loadBalancers; var application = application(); if (application.isPresent()) { - loadBalancers = nodeRepository.loadBalancers(application.get()); + loadBalancers = nodeRepository.loadBalancers().list(application.get()); } else { - loadBalancers = nodeRepository.loadBalancers(); + loadBalancers = nodeRepository.loadBalancers().list(); } return loadBalancers.asList(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java index 07e93111b6f..2d2feccc114 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java @@ -31,7 +31,7 @@ public class NodeAclResponse extends HttpResponse { super(200); this.nodeRepository = nodeRepository; this.slime = new Slime(); - this.aclsForChildren = request.getBooleanProperty(CHILDREN_REQUEST_PROPERTY); + this.aclsForChildren = request.getBooleanProperty(CHILDREN_REQUEST_PROPERTY); // This is always true? Cursor root = slime.setObject(); String hostname = baseName(request.getUri().getPath()); @@ -42,7 +42,8 @@ public class NodeAclResponse extends HttpResponse { Node node = nodeRepository.getNode(hostname) .orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'")); - List<NodeAcl> acls = nodeRepository.getNodeAcls(node, aclsForChildren); + List<NodeAcl> acls = aclsForChildren ? nodeRepository.getChildAcls(node) : + List.of(node.acl(nodeRepository.list(), nodeRepository.loadBalancers())); Cursor trustedNodesArray = object.setArray("trustedNodes"); acls.forEach(nodeAcl -> toSlime(nodeAcl, trustedNodesArray)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java index fd54a44c3df..7ef13cc0be2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java @@ -52,12 +52,12 @@ public class AclProvisioningTest { // Get trusted nodes for the first active node Node node = activeNodes.get(0); List<Node> host = node.parentHostname().flatMap(tester.nodeRepository()::getNode).map(List::of).orElseGet(List::of); - Supplier<List<NodeAcl>> nodeAcls = () -> tester.nodeRepository().getNodeAcls(node, false); + Supplier<NodeAcl> nodeAcls = () -> node.acl(tester.nodeRepository().list(), tester.nodeRepository().loadBalancers()); // Trusted nodes are active nodes in same application, proxy nodes and config servers assertAcls(List.of(activeNodes, proxyNodes, configServers, host), Set.of("10.2.3.0/24", "10.4.5.0/24"), - nodeAcls.get()); + List.of(nodeAcls.get())); } @Test @@ -73,11 +73,11 @@ public class AclProvisioningTest { // Get trusted nodes for a ready tenant node Node node = tester.nodeRepository().getNodes(NodeType.tenant, Node.State.ready).get(0); - List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(node, false); + NodeAcl nodeAcl = node.acl(tester.nodeRepository().list(), tester.nodeRepository().loadBalancers()); List<Node> tenantNodes = tester.nodeRepository().getNodes(NodeType.tenant); // Trusted nodes are all proxy-, config-, and, tenant-nodes - assertAcls(List.of(proxyNodes, configServers, tenantNodes), nodeAcls); + assertAcls(List.of(proxyNodes, configServers, tenantNodes), List.of(nodeAcl)); } @Test @@ -95,10 +95,10 @@ public class AclProvisioningTest { // Get trusted nodes for the first config server Node node = tester.nodeRepository().getNode("cfg1") .orElseThrow(() -> new RuntimeException("Failed to find cfg1")); - List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(node, false); + NodeAcl nodeAcl = node.acl(tester.nodeRepository().list(), tester.nodeRepository().loadBalancers()); // Trusted nodes is all tenant nodes, all proxy nodes, all config servers and load balancer subnets - assertAcls(List.of(tenantNodes, proxyNodes, configServers), Set.of("10.2.3.0/24", "10.4.5.0/24"), nodeAcls); + assertAcls(List.of(tenantNodes, proxyNodes, configServers), Set.of("10.2.3.0/24", "10.4.5.0/24"), List.of(nodeAcl)); } @Test @@ -116,10 +116,10 @@ public class AclProvisioningTest { // Get trusted nodes for first proxy node List<Node> proxyNodes = tester.nodeRepository().getNodes(zoneApplication); Node node = proxyNodes.get(0); - List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(node, false); + NodeAcl nodeAcl = node.acl(tester.nodeRepository().list(), tester.nodeRepository().loadBalancers()); // Trusted nodes is all config servers and all proxy nodes - assertAcls(List.of(proxyNodes, configServers), nodeAcls); + assertAcls(List.of(proxyNodes, configServers), List.of(nodeAcl)); } @Test @@ -132,7 +132,7 @@ public class AclProvisioningTest { List<Node> dockerNodes = tester.makeReadyVirtualDockerNodes(5, new NodeResources(1, 4, 10, 1), dockerHostNodeUnderTest.hostname()); - List<NodeAcl> acls = tester.nodeRepository().getNodeAcls(dockerHostNodeUnderTest, true); + List<NodeAcl> acls = tester.nodeRepository().getChildAcls(dockerHostNodeUnderTest); // ACLs for each container on the Docker host assertFalse(dockerNodes.isEmpty()); @@ -156,9 +156,9 @@ public class AclProvisioningTest { List<Node> controllers = tester.deploy(controllerApplication, Capacity.fromRequiredNodeType(NodeType.controller)); // Controllers and hosts all trust each other - List<NodeAcl> controllerAcls = tester.nodeRepository().getNodeAcls(controllers.get(0), false); - assertAcls(List.of(controllers), controllerAcls); - assertEquals(Set.of(22, 80, 4443, 443), controllerAcls.get(0).trustedPorts()); + NodeAcl controllerAcl = controllers.get(0).acl(tester.nodeRepository().list(), tester.nodeRepository().loadBalancers()); + assertAcls(List.of(controllers), List.of(controllerAcl)); + assertEquals(Set.of(22, 4443, 443), controllerAcl.trustedPorts()); } @Test @@ -177,14 +177,14 @@ public class AclProvisioningTest { assertEquals(2, activeNodes.size()); // Load balancer is allocated to application - var loadBalancers = tester.nodeRepository().loadBalancers(application); + var loadBalancers = tester.nodeRepository().loadBalancers().list(application); assertEquals(1, loadBalancers.asList().size()); var lbNetworks = loadBalancers.asList().get(0).instance().networks(); assertEquals(2, lbNetworks.size()); // ACL for nodes with allocation trust their respective load balancer networks, if any for (var host : hosts) { - var acls = tester.nodeRepository().getNodeAcls(host, true); + var acls = tester.nodeRepository().getChildAcls(host); assertEquals(2, acls.size()); assertEquals(Set.of(), acls.get(0).trustedNetworks()); assertEquals(application, acls.get(1).node().allocation().get().owner()); @@ -197,10 +197,10 @@ public class AclProvisioningTest { tester.makeConfigServers(3, "default", Version.fromString("6.123.456")); List<Node> readyNodes = tester.makeReadyNodes(1, "default", NodeType.proxy); - List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(readyNodes.get(0), false); + NodeAcl nodeAcl = readyNodes.get(0).acl(tester.nodeRepository().list(), tester.nodeRepository().loadBalancers()); - assertEquals(3, nodeAcls.get(0).trustedNodes().size()); - Iterator<Node> trustedNodes = nodeAcls.get(0).trustedNodes().iterator(); + assertEquals(3, nodeAcl.trustedNodes().size()); + Iterator<Node> trustedNodes = nodeAcl.trustedNodes().iterator(); assertEquals(Set.of("127.0.1.1"), trustedNodes.next().ipConfig().primary()); assertEquals(Set.of("127.0.1.2"), trustedNodes.next().ipConfig().primary()); assertEquals(Set.of("127.0.1.3"), trustedNodes.next().ipConfig().primary()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java index 105f2122e0c..ec08ed15ab4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java @@ -49,8 +49,8 @@ public class LoadBalancerProvisionerTest { @Test public void provision_load_balancer() { - Supplier<List<LoadBalancer>> lbApp1 = () -> tester.nodeRepository().loadBalancers(app1).asList(); - Supplier<List<LoadBalancer>> lbApp2 = () -> tester.nodeRepository().loadBalancers(app2).asList(); + Supplier<List<LoadBalancer>> lbApp1 = () -> tester.nodeRepository().loadBalancers().list(app1).asList(); + Supplier<List<LoadBalancer>> lbApp2 = () -> tester.nodeRepository().loadBalancers().list(app2).asList(); ClusterSpec.Id containerCluster1 = ClusterSpec.Id.from("qrs1"); ClusterSpec.Id contentCluster = ClusterSpec.Id.from("content"); @@ -82,7 +82,7 @@ public class LoadBalancerProvisionerTest { tester.activate(app1, prepare(app1, clusterRequest(ClusterSpec.Type.container, containerCluster1), clusterRequest(ClusterSpec.Type.content, contentCluster))); - LoadBalancer loadBalancer = tester.nodeRepository().loadBalancers(app1).asList().get(0); + LoadBalancer loadBalancer = tester.nodeRepository().loadBalancers().list(app1).asList().get(0); assertEquals(2, loadBalancer.instance().reals().size()); assertTrue("Failed node is removed", loadBalancer.instance().reals().stream() .map(Real::hostname) @@ -159,7 +159,7 @@ public class LoadBalancerProvisionerTest { tester.makeReadyHosts(2, resources); tester.activateTenantHosts(); var nodes = tester.prepare(app1, clusterRequest(ClusterSpec.Type.container, ClusterSpec.Id.from("qrs")), 2 , 1, resources); - Supplier<LoadBalancer> lb = () -> tester.nodeRepository().loadBalancers(app1).asList().get(0); + Supplier<LoadBalancer> lb = () -> tester.nodeRepository().loadBalancers().list(app1).asList().get(0); assertTrue("Load balancer provisioned with empty reals", tester.loadBalancerService().instances().get(lb.get().id()).reals().isEmpty()); assignIps(tester.nodeRepository().getNodes(app1)); tester.activate(app1, nodes); @@ -185,7 +185,7 @@ public class LoadBalancerProvisionerTest { clusterRequest(ClusterSpec.Type.container, ClusterSpec.Id.from("tenant-host")))); assertTrue("No load balancer provisioned", tester.loadBalancerService().instances().isEmpty()); - assertEquals(List.of(), tester.nodeRepository().loadBalancers(infraApp1).asList()); + assertEquals(List.of(), tester.nodeRepository().loadBalancers().list(infraApp1).asList()); } @Test @@ -193,12 +193,12 @@ public class LoadBalancerProvisionerTest { tester.activate(app1, prepare(app1, clusterRequest(ClusterSpec.Type.content, ClusterSpec.Id.from("tenant-host")))); assertTrue("No load balancer provisioned", tester.loadBalancerService().instances().isEmpty()); - assertEquals(List.of(), tester.nodeRepository().loadBalancers(app1).asList()); + assertEquals(List.of(), tester.nodeRepository().loadBalancers().list(app1).asList()); } @Test public void provision_load_balancer_combined_cluster() { - Supplier<List<LoadBalancer>> lbs = () -> tester.nodeRepository().loadBalancers(app1).asList(); + Supplier<List<LoadBalancer>> lbs = () -> tester.nodeRepository().loadBalancers().list(app1).asList(); var combinedId = ClusterSpec.Id.from("container1"); var nodes = prepare(app1, clusterRequest(ClusterSpec.Type.combined, ClusterSpec.Id.from("content1"), Optional.of(combinedId))); assertEquals(1, lbs.get().size()); @@ -211,7 +211,7 @@ public class LoadBalancerProvisionerTest { @Test public void provision_load_balancer_config_server_cluster() { ApplicationId configServerApp = ApplicationId.from("hosted-vespa", "zone-config-servers", "default"); - Supplier<List<LoadBalancer>> lbs = () -> tester.nodeRepository().loadBalancers(configServerApp).asList(); + Supplier<List<LoadBalancer>> lbs = () -> tester.nodeRepository().loadBalancers().list(configServerApp).asList(); var cluster = ClusterSpec.Id.from("zone-config-servers"); var nodes = prepare(configServerApp, Capacity.fromRequiredNodeType(NodeType.config), clusterRequest(ClusterSpec.Type.admin, cluster)); @@ -226,7 +226,7 @@ public class LoadBalancerProvisionerTest { @Test public void provision_load_balancer_controller_cluster() { ApplicationId controllerApp = ApplicationId.from("hosted-vespa", "controller", "default"); - Supplier<List<LoadBalancer>> lbs = () -> tester.nodeRepository().loadBalancers(controllerApp).asList(); + Supplier<List<LoadBalancer>> lbs = () -> tester.nodeRepository().loadBalancers().list(controllerApp).asList(); var cluster = ClusterSpec.Id.from("zone-config-servers"); var nodes = prepare(controllerApp, Capacity.fromRequiredNodeType(NodeType.controller), clusterRequest(ClusterSpec.Type.container, cluster)); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index d3e7afba25f..ff1c56f6b2f 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -56,8 +56,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { } @Override - public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) - throws HostStateChangeDeniedException { + public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant // suspension in this case they will permanently be down/removed. @@ -123,8 +122,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { // I proxy host if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) { - // All nodes have all state and we need to be able to remove the half that are retired on cluster migration - return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT; + return ConcurrentSuspensionLimitForCluster.ONE_NODE; } if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER) @@ -164,8 +162,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { } if (ServiceType.CLUSTER_CONTROLLER.equals(clusterApi.serviceType())) { - // All nodes have all state and we need to be able to remove the half that are retired on cluster migration - return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT; + return ConcurrentSuspensionLimitForCluster.ONE_NODE; } if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) { diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_jobtest.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_jobtest.cpp index 84579eb9c9e..21bd96552b8 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_jobtest.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_jobtest.cpp @@ -111,6 +111,7 @@ JobTestBase::endScan() { JobTestBase & JobTestBase::compact() { + EXPECT_FALSE(run()); EXPECT_TRUE(run()); return *this; } diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job_base.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job_base.cpp index 3987282ef34..bf93ceca6d8 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job_base.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job_base.cpp @@ -138,6 +138,7 @@ LidSpaceCompactionJobBase::run() } else { _scanItr = IDocumentScanIterator::UP(); _shouldCompactLidSpace = true; + return false; } } diff --git a/vespalib/src/apps/vespa-detect-hostname/detect_hostname.cpp b/vespalib/src/apps/vespa-detect-hostname/detect_hostname.cpp index 5c84790a669..d65c8f20e12 100644 --- a/vespalib/src/apps/vespa-detect-hostname/detect_hostname.cpp +++ b/vespalib/src/apps/vespa-detect-hostname/detect_hostname.cpp @@ -49,6 +49,10 @@ int main(int, char **) { if (check(my_hostname, my_ip_set, my_hostname_error)) { fprintf(stdout, "%s\n", my_hostname.c_str()); } else if (check(localhost, my_ip_set, localhost_error)) { + fprintf(stderr, "WARN: hostname detection failed, falling back to local hostname: %s", localhost.c_str()); + fprintf(stderr, " WARN: canonical hostname (from gethostname/getaddrinfo): %s\n", my_hostname.c_str()); + fprintf(stderr, " WARN: %s\n", my_hostname_error.c_str()); + fprintf(stdout, "%s\n", localhost.c_str()); } else { fprintf(stderr, "FATAL: hostname detection failed\n"); |