diff options
75 files changed, 773 insertions, 657 deletions
diff --git a/client/go/internal/cli/cmd/document.go b/client/go/internal/cli/cmd/document.go index c31f8c34d14..1e5d1c30f6e 100644 --- a/client/go/internal/cli/cmd/document.go +++ b/client/go/internal/cli/cmd/document.go @@ -171,7 +171,7 @@ https://docs.vespa.ai/en/reference/document-json-format.html#document-operations When this returns successfully, the document is guaranteed to be visible in any subsequent get or query operation. -To feed with high throughput, https://docs.vespa.ai/en/vespa-feed-client.html +To feed with high throughput, https://docs.vespa.ai/en/reference/vespa-cli/vespa_feed.html should be used instead of this.`, Example: `$ vespa document src/test/resources/A-Head-Full-of-Dreams.json`, DisableAutoGenTag: true, diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfile.java b/config-model/src/main/java/com/yahoo/schema/RankProfile.java index 69f32daef4a..35ef12f077a 100644 --- a/config-model/src/main/java/com/yahoo/schema/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/RankProfile.java @@ -100,6 +100,7 @@ public class RankProfile implements Cloneable { private Double termwiseLimit = null; private Double postFilterThreshold = null; private Double approximateThreshold = null; + private Double targetHitsMaxAdjustmentFactor = null; /** The drop limit used to drop hits with rank score less than or equal to this value */ private double rankScoreDropLimit = -Double.MAX_VALUE; @@ -768,6 +769,7 @@ public class RankProfile implements Cloneable { public void setTermwiseLimit(double termwiseLimit) { this.termwiseLimit = termwiseLimit; } public void setPostFilterThreshold(double threshold) { this.postFilterThreshold = threshold; } public void setApproximateThreshold(double threshold) { this.approximateThreshold = threshold; } + public void setTargetHitsMaxAdjustmentFactor(double factor) { this.targetHitsMaxAdjustmentFactor = factor; } public OptionalDouble getTermwiseLimit() { if (termwiseLimit != null) return OptionalDouble.of(termwiseLimit); @@ -789,6 +791,13 @@ public class RankProfile implements Cloneable { return uniquelyInherited(p -> p.getApproximateThreshold(), l -> l.isPresent(), "approximate-threshold").orElse(OptionalDouble.empty()); } + public OptionalDouble getTargetHitsMaxAdjustmentFactor() { + if (targetHitsMaxAdjustmentFactor != null) { + return OptionalDouble.of(targetHitsMaxAdjustmentFactor); + } + return uniquelyInherited(p -> p.getTargetHitsMaxAdjustmentFactor(), l -> l.isPresent(), "target-hits-max-adjustment-factor").orElse(OptionalDouble.empty()); + } + /** Whether we should ignore the default rank features. Set to null to use inherited */ public void setIgnoreDefaultRankFeatures(Boolean ignoreDefaultRankFeatures) { this.ignoreDefaultRankFeatures = ignoreDefaultRankFeatures; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index 82c0c9d516a..29bd454cc62 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -153,6 +153,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private final double termwiseLimit; private final OptionalDouble postFilterThreshold; private final OptionalDouble approximateThreshold; + private final OptionalDouble targetHitsMaxAdjustmentFactor; private final double rankScoreDropLimit; private final boolean enableNestedMultivalueGrouping; @@ -197,6 +198,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { enableNestedMultivalueGrouping = deployProperties.featureFlags().enableNestedMultivalueGrouping(); postFilterThreshold = compiled.getPostFilterThreshold(); approximateThreshold = compiled.getApproximateThreshold(); + targetHitsMaxAdjustmentFactor = compiled.getTargetHitsMaxAdjustmentFactor(); keepRankCount = compiled.getKeepRankCount(); rankScoreDropLimit = compiled.getRankScoreDropLimit(); ignoreDefaultRankFeatures = compiled.getIgnoreDefaultRankFeatures(); @@ -429,6 +431,9 @@ public class RawRankProfile implements RankProfilesConfig.Producer { if (approximateThreshold.isPresent()) { properties.add(new Pair<>("vespa.matching.global_filter.lower_limit", String.valueOf(approximateThreshold.getAsDouble()))); } + if (targetHitsMaxAdjustmentFactor.isPresent()) { + properties.add(new Pair<>("vespa.matching.nns.target_hits_max_adjustment_factor", String.valueOf(targetHitsMaxAdjustmentFactor.getAsDouble()))); + } if (matchPhaseSettings != null) { properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute())); properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + "")); diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java index bdecf6332a0..c25d393c8bf 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java @@ -65,6 +65,8 @@ public class ConvertParsedRanking { (value -> profile.setPostFilterThreshold(value)); parsed.getApproximateThreshold().ifPresent (value -> profile.setApproximateThreshold(value)); + parsed.getTargetHitsMaxAdjustmentFactor().ifPresent + (value -> profile.setTargetHitsMaxAdjustmentFactor(value)); parsed.getKeepRankCount().ifPresent (value -> profile.setKeepRankCount(value)); parsed.getMinHitsPerThread().ifPresent diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java index 2809ee0c633..1d06b993cdc 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java @@ -29,6 +29,7 @@ class ParsedRankProfile extends ParsedBlock { private Double termwiseLimit = null; private Double postFilterThreshold = null; private Double approximateThreshold = null; + private Double targetHitsMaxAdjustmentFactor = null; private final List<FeatureList> matchFeatures = new ArrayList<>(); private final List<FeatureList> rankFeatures = new ArrayList<>(); private final List<FeatureList> summaryFeatures = new ArrayList<>(); @@ -65,6 +66,7 @@ class ParsedRankProfile extends ParsedBlock { Optional<Double> getTermwiseLimit() { return Optional.ofNullable(this.termwiseLimit); } Optional<Double> getPostFilterThreshold() { return Optional.ofNullable(this.postFilterThreshold); } Optional<Double> getApproximateThreshold() { return Optional.ofNullable(this.approximateThreshold); } + Optional<Double> getTargetHitsMaxAdjustmentFactor() { return Optional.ofNullable(this.targetHitsMaxAdjustmentFactor); } List<FeatureList> getMatchFeatures() { return List.copyOf(this.matchFeatures); } List<FeatureList> getRankFeatures() { return List.copyOf(this.rankFeatures); } List<FeatureList> getSummaryFeatures() { return List.copyOf(this.summaryFeatures); } @@ -231,4 +233,9 @@ class ParsedRankProfile extends ParsedBlock { this.approximateThreshold = threshold; } + void setTargetHitsMaxAdjustmentFactor(double factor) { + verifyThat(targetHitsMaxAdjustmentFactor == null, "already has target-hits-max-adjustment-factor"); + this.targetHitsMaxAdjustmentFactor = factor; + } + } diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj index b2cb258c0ab..42eeabb5ac7 100644 --- a/config-model/src/main/javacc/SchemaParser.jj +++ b/config-model/src/main/javacc/SchemaParser.jj @@ -326,6 +326,7 @@ TOKEN : | < TERMWISE_LIMIT: "termwise-limit" > | < POST_FILTER_THRESHOLD: "post-filter-threshold" > | < APPROXIMATE_THRESHOLD: "approximate-threshold" > +| < TARGET_HITS_MAX_ADJUSTMENT_FACTOR: "target-hits-max-adjustment-factor" > | < KEEP_RANK_COUNT: "keep-rank-count" > | < RANK_SCORE_DROP_LIMIT: "rank-score-drop-limit" > | < CONSTANTS: "constants" > @@ -1727,6 +1728,7 @@ void rankProfileItem(ParsedSchema schema, ParsedRankProfile profile) : { } | termwiseLimit(profile) | postFilterThreshold(profile) | approximateThreshold(profile) + | targetHitsMaxAdjustmentFactor(profile) | rankFeatures(profile) | rankProperties(profile) | secondPhase(profile) @@ -2190,6 +2192,19 @@ void approximateThreshold(ParsedRankProfile profile) : } /** + * This rule consumes a target-hits-max-adjustment-factor statement for a rank profile. + * + * @param profile the rank profile to modify + */ +void targetHitsMaxAdjustmentFactor(ParsedRankProfile profile) : +{ + double factor; +} +{ + (<TARGET_HITS_MAX_ADJUSTMENT_FACTOR> <COLON> factor = floatValue()) { profile.setTargetHitsMaxAdjustmentFactor(factor); } +} + +/** * Consumes a rank-properties block of a rank profile. There * is a little trick within this rule to allow the final rank property * to skip the terminating newline token. @@ -2641,6 +2656,7 @@ String identifierWithDash() : | <SECOND_PHASE> | <STRUCT_FIELD> | <SUMMARY_TO> + | <TARGET_HITS_MAX_ADJUSTMENT_FACTOR> | <TERMWISE_LIMIT> | <UPPER_BOUND> ) { return token.image; } diff --git a/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java b/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java index 85225f0d255..380b458ea8c 100644 --- a/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java @@ -459,17 +459,9 @@ public class RankProfileTestCase extends AbstractSchemaTestCase { } private void verifyApproximateNearestNeighborThresholdSettings(Double postFilterThreshold, Double approximateThreshold) throws ParseException { - var rankProfileRegistry = new RankProfileRegistry(); - var props = new TestProperties(); - var queryProfileRegistry = new QueryProfileRegistry(); - var builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry, props); - builder.addSchema(createSDWithRankProfileThresholds(postFilterThreshold, approximateThreshold)); - builder.build(true); - - var schema = builder.getSchema(); - var rankProfile = rankProfileRegistry.get(schema, "my_profile"); - var rawRankProfile = new RawRankProfile(rankProfile, new LargeRankingExpressions(new MockFileRegistry()), queryProfileRegistry, - new ImportedMlModels(), new AttributeFields(schema), props); + var rp = createRankProfile(postFilterThreshold, approximateThreshold, null); + var rankProfile = rp.getFirst(); + var rawRankProfile = rp.getSecond(); if (postFilterThreshold != null) { assertEquals((double)postFilterThreshold, rankProfile.getPostFilterThreshold().getAsDouble(), 0.000001); @@ -488,13 +480,52 @@ public class RankProfileTestCase extends AbstractSchemaTestCase { } } - private String createSDWithRankProfileThresholds(Double postFilterThreshold, Double approximateThreshold) { + @Test + void target_hits_max_adjustment_factor_is_configurable() throws ParseException { + verifyTargetHitsMaxAdjustmentFactor(null); + verifyTargetHitsMaxAdjustmentFactor(2.0); + } + + private void verifyTargetHitsMaxAdjustmentFactor(Double targetHitsMaxAdjustmentFactor) throws ParseException { + var rp = createRankProfile(null, null, targetHitsMaxAdjustmentFactor); + var rankProfile = rp.getFirst(); + var rawRankProfile = rp.getSecond(); + if (targetHitsMaxAdjustmentFactor != null) { + assertEquals((double)targetHitsMaxAdjustmentFactor, rankProfile.getTargetHitsMaxAdjustmentFactor().getAsDouble(), 0.000001); + assertEquals(String.valueOf(targetHitsMaxAdjustmentFactor), findProperty(rawRankProfile.configProperties(), "vespa.matching.nns.target_hits_max_adjustment_factor").get()); + } else { + assertTrue(rankProfile.getTargetHitsMaxAdjustmentFactor().isEmpty()); + assertFalse(findProperty(rawRankProfile.configProperties(), "vespa.matching.nns.target_hits_max_adjustment_factor").isPresent()); + } + } + + private Pair<RankProfile, RawRankProfile> createRankProfile(Double postFilterThreshold, + Double approximateThreshold, + Double targetHitsMaxAdjustmentFactor) throws ParseException { + var rankProfileRegistry = new RankProfileRegistry(); + var props = new TestProperties(); + var queryProfileRegistry = new QueryProfileRegistry(); + var builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry, props); + builder.addSchema(createSDWithRankProfile(postFilterThreshold, approximateThreshold, targetHitsMaxAdjustmentFactor)); + builder.build(true); + + var schema = builder.getSchema(); + var rankProfile = rankProfileRegistry.get(schema, "my_profile"); + var rawRankProfile = new RawRankProfile(rankProfile, new LargeRankingExpressions(new MockFileRegistry()), queryProfileRegistry, + new ImportedMlModels(), new AttributeFields(schema), props); + return new Pair<>(rankProfile, rawRankProfile); + } + + private String createSDWithRankProfile(Double postFilterThreshold, + Double approximateThreshold, + Double targetHitsMaxAdjustmentFactor) { return joinLines( "search test {", " document test {}", " rank-profile my_profile {", - (postFilterThreshold != null ? (" post-filter-threshold: " + postFilterThreshold) : ""), - (approximateThreshold != null ? (" approximate-threshold: " + approximateThreshold) : ""), + (postFilterThreshold != null ? (" post-filter-threshold: " + postFilterThreshold) : ""), + (approximateThreshold != null ? (" approximate-threshold: " + approximateThreshold) : ""), + (targetHitsMaxAdjustmentFactor != null ? (" target-hits-max-adjustment-factor: " + targetHitsMaxAdjustmentFactor) : ""), " }", "}"); } diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 0f440957dfd..cdb660f294a 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -6981,12 +6981,14 @@ "public java.lang.Integer getMinHitsPerThread()", "public java.lang.Double getPostFilterThreshold()", "public java.lang.Double getApproximateThreshold()", + "public java.lang.Double getTargetHitsMaxAdjustmentFactor()", "public void setTermwiselimit(double)", "public void setNumThreadsPerSearch(int)", "public void setNumSearchPartitions(int)", "public void setMinHitsPerThread(int)", "public void setPostFilterThreshold(double)", "public void setApproximateThreshold(double)", + "public void setTargetHitsMaxAdjustmentFactor(double)", "public void prepare(com.yahoo.search.query.ranking.RankProperties)", "public com.yahoo.search.query.ranking.Matching clone()", "public boolean equals(java.lang.Object)", @@ -7000,6 +7002,7 @@ "public static final java.lang.String MINHITSPERTHREAD", "public static final java.lang.String POST_FILTER_THRESHOLD", "public static final java.lang.String APPROXIMATE_THRESHOLD", + "public static final java.lang.String TARGET_HITS_MAX_ADJUSTMENT_FACTOR", "public java.lang.Double termwiseLimit" ] }, diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java index 625a8bcb6da..c86c21d677f 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java @@ -1,20 +1,17 @@ package com.yahoo.search.dispatch; import com.yahoo.component.ComponentId; +import com.yahoo.component.annotation.Inject; import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.container.QrConfig; import com.yahoo.container.handler.VipStatus; -import com.yahoo.messagebus.network.rpc.SlobrokConfigSubscriber; import com.yahoo.vespa.config.search.DispatchConfig; import com.yahoo.vespa.config.search.DispatchNodesConfig; import com.yahoo.yolean.UncheckedInterruptedException; -import java.util.Objects; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import static java.util.Objects.requireNonNull; - /** * @author jonmv */ @@ -22,10 +19,20 @@ public class ReconfigurableDispatcher extends Dispatcher { private final ConfigSubscriber subscriber; - public ReconfigurableDispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, VipStatus vipStatus) { + @Inject + public ReconfigurableDispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, QrConfig qrConfig, VipStatus vipStatus) { super(clusterId, dispatchConfig, new DispatchNodesConfig.Builder().build(), vipStatus); this.subscriber = new ConfigSubscriber(); - this.subscriber.subscribe(this::updateWithNewConfig, DispatchNodesConfig.class, clusterId.stringValue()); + CountDownLatch configured = new CountDownLatch(1); + this.subscriber.subscribe(config -> { updateWithNewConfig(config); configured.countDown(); }, + DispatchNodesConfig.class, configId(clusterId, qrConfig)); + try { + if ( ! configured.await(1, TimeUnit.MINUTES)) + throw new IllegalStateException("timed out waiting for initial dispatch nodes config for " + clusterId.getName()); + } + catch (InterruptedException e) { + throw new UncheckedInterruptedException("interrupted waiting for initial dispatch nodes config for " + clusterId.getName(), e); + } } @Override @@ -34,4 +41,8 @@ public class ReconfigurableDispatcher extends Dispatcher { super.deconstruct(); } + private static String configId(ComponentId clusterId, QrConfig qrConfig) { + return qrConfig.clustername() + "/component/" + clusterId.getName(); + } + } diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java index 800b3a1ba89..99d6959441a 100644 --- a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java @@ -91,6 +91,7 @@ public class QueryProperties extends Properties { addDualCasedRM(map, Matching.MINHITSPERTHREAD, GetterSetter.of(query -> query.getRanking().getMatching().getMinHitsPerThread(), (query, value) -> query.getRanking().getMatching().setMinHitsPerThread(asInteger(value, 0)))); addDualCasedRM(map, Matching.POST_FILTER_THRESHOLD, GetterSetter.of(query -> query.getRanking().getMatching().getPostFilterThreshold(), (query, value) -> query.getRanking().getMatching().setPostFilterThreshold(asDouble(value, 1.0)))); addDualCasedRM(map, Matching.APPROXIMATE_THRESHOLD, GetterSetter.of(query -> query.getRanking().getMatching().getApproximateThreshold(), (query, value) -> query.getRanking().getMatching().setApproximateThreshold(asDouble(value, 0.05)))); + addDualCasedRM(map, Matching.TARGET_HITS_MAX_ADJUSTMENT_FACTOR, GetterSetter.of(query -> query.getRanking().getMatching().getTargetHitsMaxAdjustmentFactor(), (query, value) -> query.getRanking().getMatching().setTargetHitsMaxAdjustmentFactor(asDouble(value, 20.0)))); map.put(CompoundName.fromComponents(Ranking.RANKING, Ranking.MATCH_PHASE, MatchPhase.ATTRIBUTE), GetterSetter.of(query -> query.getRanking().getMatchPhase().getAttribute(), (query, value) -> query.getRanking().getMatchPhase().setAttribute(asString(value, null)))); map.put(CompoundName.fromComponents(Ranking.RANKING, Ranking.MATCH_PHASE, MatchPhase.ASCENDING), GetterSetter.of(query -> query.getRanking().getMatchPhase().getAscending(), (query, value) -> query.getRanking().getMatchPhase().setAscending(asBoolean(value, false)))); diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/Matching.java b/container-search/src/main/java/com/yahoo/search/query/ranking/Matching.java index 35fbd52f967..4d21f32d16d 100644 --- a/container-search/src/main/java/com/yahoo/search/query/ranking/Matching.java +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/Matching.java @@ -24,6 +24,7 @@ public class Matching implements Cloneable { public static final String MINHITSPERTHREAD = "minHitsPerThread"; public static final String POST_FILTER_THRESHOLD = "postFilterThreshold"; public static final String APPROXIMATE_THRESHOLD = "approximateThreshold"; + public static final String TARGET_HITS_MAX_ADJUSTMENT_FACTOR = "targetHitsMaxAdjustmentFactor"; static { argumentType =new QueryProfileType(Ranking.MATCHING); @@ -35,6 +36,7 @@ public class Matching implements Cloneable { argumentType.addField(new FieldDescription(MINHITSPERTHREAD, "integer")); argumentType.addField(new FieldDescription(POST_FILTER_THRESHOLD, "double")); argumentType.addField(new FieldDescription(APPROXIMATE_THRESHOLD, "double")); + argumentType.addField(new FieldDescription(TARGET_HITS_MAX_ADJUSTMENT_FACTOR, "double")); argumentType.freeze(); } @@ -46,6 +48,7 @@ public class Matching implements Cloneable { private Integer minHitsPerThread = null; private Double postFilterThreshold = null; private Double approximateThreshold = null; + private Double targetHitsMaxAdjustmentFactor = null; public Double getTermwiseLimit() { return termwiseLimit; } public Integer getNumThreadsPerSearch() { return numThreadsPerSearch; } @@ -53,6 +56,7 @@ public class Matching implements Cloneable { public Integer getMinHitsPerThread() { return minHitsPerThread; } public Double getPostFilterThreshold() { return postFilterThreshold; } public Double getApproximateThreshold() { return approximateThreshold; } + public Double getTargetHitsMaxAdjustmentFactor() { return targetHitsMaxAdjustmentFactor; } public void setTermwiselimit(double value) { if ((value < 0.0) || (value > 1.0)) { @@ -75,6 +79,9 @@ public class Matching implements Cloneable { public void setApproximateThreshold(double threshold) { approximateThreshold = threshold; } + public void setTargetHitsMaxAdjustmentFactor(double factor) { + targetHitsMaxAdjustmentFactor = factor; + } /** Internal operation - DO NOT USE */ public void prepare(RankProperties rankProperties) { @@ -97,6 +104,9 @@ public class Matching implements Cloneable { if (approximateThreshold != null) { rankProperties.put("vespa.matching.global_filter.lower_limit", String.valueOf(approximateThreshold)); } + if (targetHitsMaxAdjustmentFactor != null) { + rankProperties.put("vespa.matching.nns.target_hits_max_adjustment_factor", String.valueOf(targetHitsMaxAdjustmentFactor)); + } } @Override @@ -119,12 +129,14 @@ public class Matching implements Cloneable { Objects.equals(numSearchPartitions, matching.numSearchPartitions) && Objects.equals(minHitsPerThread, matching.minHitsPerThread) && Objects.equals(postFilterThreshold, matching.postFilterThreshold) && - Objects.equals(approximateThreshold, matching.approximateThreshold); + Objects.equals(approximateThreshold, matching.approximateThreshold) && + Objects.equals(targetHitsMaxAdjustmentFactor, matching.targetHitsMaxAdjustmentFactor); } @Override public int hashCode() { - return Objects.hash(termwiseLimit, numThreadsPerSearch, numSearchPartitions, minHitsPerThread, postFilterThreshold, approximateThreshold); + return Objects.hash(termwiseLimit, numThreadsPerSearch, numSearchPartitions, minHitsPerThread, + postFilterThreshold, approximateThreshold, targetHitsMaxAdjustmentFactor); } } diff --git a/container-search/src/test/java/com/yahoo/search/query/MatchingTestCase.java b/container-search/src/test/java/com/yahoo/search/query/MatchingTestCase.java index e3a1eb18a33..37d0e9e1072 100644 --- a/container-search/src/test/java/com/yahoo/search/query/MatchingTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/query/MatchingTestCase.java @@ -20,6 +20,7 @@ public class MatchingTestCase { assertNull(query.getRanking().getMatching().getMinHitsPerThread()); assertNull(query.getRanking().getMatching().getPostFilterThreshold()); assertNull(query.getRanking().getMatching().getApproximateThreshold()); + assertNull(query.getRanking().getMatching().getTargetHitsMaxAdjustmentFactor()); } @Test @@ -30,13 +31,15 @@ public class MatchingTestCase { "&ranking.matching.numSearchPartitions=13" + "&ranking.matching.minHitsPerThread=3" + "&ranking.matching.postFilterThreshold=0.8" + - "&ranking.matching.approximateThreshold=0.3"); + "&ranking.matching.approximateThreshold=0.3" + + "&ranking.matching.targetHitsMaxAdjustmentFactor=2.5"); assertEquals(Double.valueOf(0.7), query.getRanking().getMatching().getTermwiseLimit()); assertEquals(Integer.valueOf(17), query.getRanking().getMatching().getNumThreadsPerSearch()); assertEquals(Integer.valueOf(13), query.getRanking().getMatching().getNumSearchPartitions()); assertEquals(Integer.valueOf(3), query.getRanking().getMatching().getMinHitsPerThread()); assertEquals(Double.valueOf(0.8), query.getRanking().getMatching().getPostFilterThreshold()); assertEquals(Double.valueOf(0.3), query.getRanking().getMatching().getApproximateThreshold()); + assertEquals(Double.valueOf(2.5), query.getRanking().getMatching().getTargetHitsMaxAdjustmentFactor()); query.prepare(); assertEquals("0.7", query.getRanking().getProperties().get("vespa.matching.termwise_limit").get(0)); @@ -45,6 +48,7 @@ public class MatchingTestCase { assertEquals("3", query.getRanking().getProperties().get("vespa.matching.minhitsperthread").get(0)); assertEquals("0.8", query.getRanking().getProperties().get("vespa.matching.global_filter.upper_limit").get(0)); assertEquals("0.3", query.getRanking().getProperties().get("vespa.matching.global_filter.lower_limit").get(0)); + assertEquals("2.5", query.getRanking().getProperties().get("vespa.matching.nns.target_hits_max_adjustment_factor").get(0)); } @Test diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index ba62d81afc1..b50e71154eb 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -385,7 +385,7 @@ public class PermanentFlags { "Takes effect immediately"); public static final UnboundBooleanFlag DROP_CACHES = defineFeatureFlag( - "drop-caches", false, + "drop-caches", true, "Drop caches on tenant hosts", "Takes effect on next tick", // The application ID is the exclusive application ID associated with the host, diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ToEpochSecondExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ToEpochSecondExpression.java new file mode 100644 index 00000000000..c8106148630 --- /dev/null +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ToEpochSecondExpression.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.indexinglanguage.expressions; + +import com.yahoo.document.DataType; +import com.yahoo.document.datatypes.LongFieldValue; +import java.time.Instant; + +/** + * Converts ISO-8601 formatted date string to UNIX Epoch Time in seconds + * + * @author bergum + */ + +public class ToEpochSecondExpression extends Expression { + public ToEpochSecondExpression() { + super(DataType.STRING); //only accept string input + } + + @Override + protected void doExecute(ExecutionContext context) { + String inputString = String.valueOf(context.getValue()); + long epochTime = Instant.parse(inputString).getEpochSecond(); + context.setValue(new LongFieldValue(epochTime)); + } + + @Override + protected void doVerify(VerificationContext context) { + context.setValueType(createdOutputType()); + } + + @Override + public DataType createdOutputType() { + return DataType.LONG; + } + + @Override + public String toString() { + return "to_epoch_second"; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof ToEpochSecondExpression; + } + + @Override + public int hashCode() { + return getClass().hashCode(); + } + +} diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index a039ad137ee..d559d9b7260 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -198,6 +198,7 @@ TOKEN : <TO_INT: "to_int"> | <TO_LONG: "to_long"> | <TO_POS: "to_pos"> | + <TO_EPOCH_SECOND: "to_epoch_second"> | <TO_STRING: "to_string"> | <TO_WSET: "to_wset"> | <TO_BOOL: "to_bool"> | @@ -338,6 +339,7 @@ Expression value() : val = toIntExp() | val = toLongExp() | val = toPosExp() | + val = toEpochSecondExp() | val = toStringExp() | val = toWsetExp() | val = toBoolExp() | @@ -713,6 +715,12 @@ Expression toPosExp() : { } { return new ToPositionExpression(); } } +Expression toEpochSecondExp() : { } +{ + ( <TO_EPOCH_SECOND> ) + { return new ToEpochSecondExpression(); } +} + Expression toStringExp() : { } { ( <TO_STRING> ) diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ToEpochSecondExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ToEpochSecondExpressionTestCase.java new file mode 100644 index 00000000000..7203afcc1a0 --- /dev/null +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/ToEpochSecondExpressionTestCase.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.indexinglanguage.expressions; + +import com.yahoo.document.DataType; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.vespa.indexinglanguage.SimpleTestAdapter; +import org.junit.Test; + +import static com.yahoo.vespa.indexinglanguage.expressions.ExpressionAssert.assertVerify; +import static com.yahoo.vespa.indexinglanguage.expressions.ExpressionAssert.assertVerifyThrows; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +public class ToEpochSecondExpressionTestCase { + @Test + public void requireThatHashCodeAndEqualsAreImplemented() { + Expression exp = new ToEpochSecondExpression(); + assertFalse(exp.equals(new Object())); + assertEquals(exp, new ToEpochSecondExpression()); + assertEquals(exp.hashCode(), new ToEpochSecondExpression().hashCode()); + } + + @Test + public void requireThatExpressionCanBeVerified() { + Expression exp = new ToEpochSecondExpression(); + assertVerify(DataType.STRING, exp, DataType.LONG); + assertVerifyThrows(DataType.INT, exp, "Expected string input, got int."); + assertVerifyThrows(null, exp, "Expected string input, got null."); + } + + @Test + public void requireThatValueIsConvertedWithMs() { + ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter()); + ctx.setValue(new StringFieldValue("2023-12-24T17:00:43.000Z")).execute(new ToEpochSecondExpression()); + FieldValue val = ctx.getValue(); + assertTrue(val instanceof LongFieldValue); + assertEquals(1703437243L, ((LongFieldValue)val).getLong()); + } + + @Test + public void requireThatValueIsConverted() { + ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter()); + ctx.setValue(new StringFieldValue("2023-12-24T17:00:43Z")).execute(new ToEpochSecondExpression()); + FieldValue val = ctx.getValue(); + assertTrue(val instanceof LongFieldValue); + assertEquals(1703437243L, ((LongFieldValue)val).getLong()); + } +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 40b0bd8d88b..8976dd9ff08 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -272,16 +272,15 @@ public class ClusterModel { private Load adjustQueryDependentIdealLoadByBcpGroupInfo(Load ideal) { double currentClusterTotalVcpuPerGroup = nodes.not().retired().first().get().resources().vcpu() * groupSize(); - double targetQueryRateToHandle = ( canRescaleWithinBcpDeadline() ? averageQueryRate().orElse(0) : cluster.bcpGroupInfo().queryRate() ) * cluster.bcpGroupInfo().growthRateHeadroom() * trafficShiftHeadroom(); - double neededTotalVcpPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() + + double neededTotalVcpuPerGroup = cluster.bcpGroupInfo().cpuCostPerQuery() * targetQueryRateToHandle / groupCount() + ( 1 - cpu.queryFraction()) * cpu.idealLoad() * (clusterSpec.type().isContainer() ? 1 : groupSize()); - - double cpuAdjustment = neededTotalVcpPerGroup / currentClusterTotalVcpuPerGroup; - return ideal.withCpu(peakLoad().cpu() / cpuAdjustment); + // Max 1: Only use bcp group info if it indicates that we need to scale *up* + double cpuAdjustment = Math.max(1.0, neededTotalVcpuPerGroup / currentClusterTotalVcpuPerGroup); + return ideal.withCpu(ideal.cpu() / cpuAdjustment); } private boolean hasScaledIn(Duration period) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 379dbb27d87..be7bc3c44a8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -32,7 +32,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.0, 7.4, 29.0, + 8, 1, 3.4, 7.4, 29.0, fixture.autoscale()); // Higher query rate @@ -40,7 +40,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 8.0, 7.4, 29.0, + 8, 1, 6.8, 7.4, 29.0, fixture.autoscale()); // Higher headroom @@ -48,7 +48,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.8, 7.4, 29.0, + 8, 1, 4.0, 7.4, 29.0, fixture.autoscale()); // Higher per query cost @@ -56,7 +56,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.0, 7.4, 29.0, + 8, 1, 5.1, 7.4, 29.0, fixture.autoscale()); // Bcp elsewhere is 0 - use local only @@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 10.5, 43.2, 190.0, + 3, 3, 11.7, 43.2, 190.0, fixture.autoscale()); // Higher query rate @@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 20.9, 43.2, 190.0, + 3, 3, 23.1, 43.2, 190.0, fixture.autoscale()); // Higher headroom @@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 12.4, 43.2, 190.0, + 3, 3, 13.8, 43.2, 190.0, fixture.autoscale()); // Higher per query cost @@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 15.7, 43.2, 190.0, + 3, 3, 17.4, 43.2, 190.0, fixture.autoscale()); } @@ -127,7 +127,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.0, 16.0, 40.8, + 4, 1, 8.0, 16.0, 40.8, fixture.autoscale()); // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share @@ -135,7 +135,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 8.0, 16.0, 40.8, + 7, 1, 8.0, 16.0, 40.8, fixture.autoscale()); // Higher headroom @@ -143,7 +143,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 5, 1, 8.0, 16.0, 40.8, + 8, 1, 4.0, 16.0, 40.8, fixture.autoscale()); // Higher per query cost @@ -151,7 +151,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 6, 1, 8.0, 16.0, 40.8, + 10, 1, 4.0, 16.0, 40.8, fixture.autoscale()); } @@ -173,7 +173,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("No need for traffic shift headroom", - 2, 1, 2.0, 16.0, 40.8, + 3, 1, 4.0, 16.0, 40.8, fixture.autoscale()); } @@ -186,7 +186,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 14.2, 7.4, 29.0, + 8, 1, 11.9, 7.4, 29.0, fixture.autoscale()); // Some local traffic @@ -196,7 +196,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration1.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.9, 7.4, 29.0, + 8, 1, 6.8, 7.4, 29.0, fixture.autoscale()); // Enough local traffic to get half the votes @@ -206,7 +206,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration2.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 2.9, 7.4, 29.0, + 8, 1, 3.0, 7.4, 29.0, fixture.autoscale()); // Mostly local @@ -270,6 +270,21 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.autoscale()); } + @Test + public void test_autoscaling_containers_with_some_local_traffic() { + var fixture = DynamicProvisioningTester.fixture().clusterType(ClusterSpec.Type.container).awsProdSetup(true).build(); + + // Some local traffic + fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.store(new BcpGroupInfo(200, 1.9, 0.01)); + Duration duration1 = fixture.loader().addCpuMeasurements(0.58f, 10); + fixture.tester().clock().advance(duration1.negated()); + fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); + fixture.tester().assertResources("Not scaling down due to group info, even though it contains much evidence queries are cheap", + 3, 1, 4.0, 16.0, 40.8, + fixture.autoscale()); + } + /** Tests with varying BCP group info parameters. */ @Test public void test_autoscaling_metrics() { diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index b59384f1493..6ef462f80c4 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -1135,12 +1135,12 @@ TEST("require that docsum matcher can extract matching elements from single attr EXPECT_EQUAL(list[1], 3u); } -struct GlobalFilterParamsFixture { +struct AttributeBlueprintParamsFixture { BlueprintFactory factory; search::fef::test::IndexEnvironment index_env; RankSetup rank_setup; Properties rank_properties; - GlobalFilterParamsFixture(double lower_limit, double upper_limit) + AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor) : factory(), index_env(), rank_setup(factory, index_env), @@ -1148,32 +1148,37 @@ struct GlobalFilterParamsFixture { { rank_setup.set_global_filter_lower_limit(lower_limit); rank_setup.set_global_filter_upper_limit(upper_limit); + rank_setup.set_target_hits_max_adjustment_factor(target_hits_max_adjustment_factor); } - void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit) { + void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit, + vespalib::stringref target_hits_max_adjustment_factor) { rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit); rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit); + rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor); } AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const { - return MatchToolsFactory::extract_global_filter_params(rank_setup, rank_properties, active_docids, docid_limit); + return MatchToolsFactory::extract_attribute_blueprint_params(rank_setup, rank_properties, active_docids, docid_limit); } }; -TEST_F("global filter params are extracted from rank profile", GlobalFilterParamsFixture(0.2, 0.8)) +TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0)) { auto params = f.extract(); EXPECT_EQUAL(0.2, params.global_filter_lower_limit); EXPECT_EQUAL(0.8, params.global_filter_upper_limit); + EXPECT_EQUAL(5.0, params.target_hits_max_adjustment_factor); } -TEST_F("global filter params are extracted from query", GlobalFilterParamsFixture(0.2, 0.8)) +TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0)) { - f.set_query_properties("0.15", "0.75"); + f.set_query_properties("0.15", "0.75", "3.0"); auto params = f.extract(); EXPECT_EQUAL(0.15, params.global_filter_lower_limit); EXPECT_EQUAL(0.75, params.global_filter_upper_limit); + EXPECT_EQUAL(3.0, params.target_hits_max_adjustment_factor); } -TEST_F("global filter params are scaled with active hit ratio", GlobalFilterParamsFixture(0.2, 0.8)) +TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0)) { auto params = f.extract(5, 10); EXPECT_EQUAL(0.12, params.global_filter_lower_limit); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index c7cbdc29689..a353d4816f6 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -176,11 +176,11 @@ MatchToolsFactory(QueryLimiter & queryLimiter, const search::IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard, bool is_search) : _queryLimiter(queryLimiter), - _global_filter_params(extract_global_filter_params(rankSetup, rankProperties, metaStore.getNumActiveLids(), searchContext.getDocIdLimit())), + _attribute_blueprint_params(extract_attribute_blueprint_params(rankSetup, rankProperties, metaStore.getNumActiveLids(), searchContext.getDocIdLimit())), _query(), _match_limiter(), _queryEnv(indexEnv, attributeContext, rankProperties, searchContext.getIndexes()), - _requestContext(doom, attributeContext, _queryEnv, _queryEnv.getObjectStore(), _global_filter_params, metaStoreReadGuard), + _requestContext(doom, attributeContext, _queryEnv, _queryEnv.getObjectStore(), _attribute_blueprint_params, metaStoreReadGuard), _mdl(), _rankSetup(rankSetup), _featureOverrides(featureOverrides), @@ -203,8 +203,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.fetchPostings(); if (is_search) { _query.handle_global_filter(searchContext.getDocIdLimit(), - _global_filter_params.global_filter_lower_limit, - _global_filter_params.global_filter_upper_limit, + _attribute_blueprint_params.global_filter_lower_limit, + _attribute_blueprint_params.global_filter_upper_limit, thread_bundle, trace); } _query.freeze(); @@ -324,18 +324,20 @@ MatchToolsFactory::get_feature_rename_map() const } AttributeBlueprintParams -MatchToolsFactory::extract_global_filter_params(const RankSetup& rank_setup, const Properties& rank_properties, - uint32_t active_docids, uint32_t docid_limit) +MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setup, const Properties& rank_properties, + uint32_t active_docids, uint32_t docid_limit) { double lower_limit = GlobalFilterLowerLimit::lookup(rank_properties, rank_setup.get_global_filter_lower_limit()); double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit()); + double target_hits_max_adjustment_factor = TargetHitsMaxAdjustmentFactor::lookup(rank_properties, rank_setup.get_target_hits_max_adjustment_factor()); // Note that we count the reserved docid 0 as active. // This ensures that when searchable-copies=1, the ratio is 1.0. double active_hit_ratio = std::min(active_docids + 1, docid_limit) / static_cast<double>(docid_limit); return {lower_limit * active_hit_ratio, - upper_limit * active_hit_ratio}; + upper_limit * active_hit_ratio, + target_hits_max_adjustment_factor}; } AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext, diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index db30ea8d2b2..681690d4c36 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -121,7 +121,7 @@ private: using IIndexEnvironment = search::fef::IIndexEnvironment; using IDiversifier = search::queryeval::IDiversifier; QueryLimiter & _queryLimiter; - AttributeBlueprintParams _global_filter_params; + AttributeBlueprintParams _attribute_blueprint_params; Query _query; MaybeMatchPhaseLimiter::UP _match_limiter; std::unique_ptr<RangeQueryLocator> _rangeLocator; @@ -177,15 +177,15 @@ public: const StringStringMap & get_feature_rename_map() const; /** - * Extracts global filter parameters from the rank-profile and query. + * Extracts attribute blueprint parameters from the rank-profile and query. * - * These parameters are expected to be in the range [0.0, 1.0], which matches the range of the estimated hit ratio of the query. + * The global filter parameters are expected to be in the range [0.0, 1.0], which matches the range of the estimated hit ratio of the query. * When searchable-copies > 1, we must scale the parameters to match the effective range of the estimated hit ratio. * This is done by multiplying with the active hit ratio (active docids / docid limit). */ static AttributeBlueprintParams - extract_global_filter_params(const RankSetup& rank_setup, const Properties& rank_properties, - uint32_t active_docids, uint32_t docid_limit); + extract_attribute_blueprint_params(const RankSetup& rank_setup, const Properties& rank_properties, + uint32_t active_docids, uint32_t docid_limit); }; } diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 6ca7d298ee2..0475f8462fc 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -1320,15 +1320,16 @@ public: return *_query_tensor; } - std::unique_ptr<NearestNeighborBlueprint> make_blueprint(bool approximate = true, double global_filter_lower_limit = 0.05) { + std::unique_ptr<NearestNeighborBlueprint> make_blueprint(bool approximate = true, + double global_filter_lower_limit = 0.05, + double target_hits_max_adjustment_factor = 20.0) { search::queryeval::FieldSpec field("foo", 0, 0); auto bp = std::make_unique<NearestNeighborBlueprint>( field, std::make_unique<DistanceCalculator>(this->as_dense_tensor(), create_query_tensor(vec_2d(17, 42))), - 3, approximate, 5, - 100100.25, - global_filter_lower_limit, 1.0, _no_doom.get_doom()); + 3, approximate, 5, 100100.25, + global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, _no_doom.get_doom()); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold()); return bp; @@ -1362,6 +1363,19 @@ TEST_F("NN blueprint handles empty filter (post-filtering)", NearestNeighborBlue EXPECT_EQUAL(NNBA::INDEX_TOP_K, bp->get_algorithm()); } +TEST_F("NN blueprint adjustment of targetHits is bound (post-filtering)", NearestNeighborBlueprintFixture) +{ + auto bp = f.make_blueprint(true, 0.05, 3.5); + auto empty_filter = GlobalFilter::create(); + bp->set_global_filter(*empty_filter, 0.2); + // targetHits is adjusted based on the estimated hit ratio of the query, + // but bound by target-hits-max-adjustment-factor + EXPECT_EQUAL(3u, bp->get_target_hits()); + EXPECT_EQUAL(10u, bp->get_adjusted_target_hits()); + EXPECT_EQUAL(10u, bp->getState().estimate().estHits); + EXPECT_EQUAL(NNBA::INDEX_TOP_K, bp->get_algorithm()); +} + TEST_F("NN blueprint handles strong filter (pre-filtering)", NearestNeighborBlueprintFixture) { auto bp = f.make_blueprint(); diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp index 50d9d36f575..f708df0a862 100644 --- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp +++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp @@ -533,6 +533,9 @@ void RankSetupTest::testRankSetup() env.getProperties().add(mutate::on_second_phase::Operation::NAME, "=7"); env.getProperties().add(mutate::on_summary::Attribute::NAME, "c"); env.getProperties().add(mutate::on_summary::Operation::NAME, "-=2"); + env.getProperties().add(matching::GlobalFilterLowerLimit::NAME, "0.3"); + env.getProperties().add(matching::GlobalFilterUpperLimit::NAME, "0.7"); + env.getProperties().add(matching::TargetHitsMaxAdjustmentFactor::NAME, "5.0"); RankSetup rs(_factory, env); EXPECT_FALSE(rs.has_match_features()); @@ -571,7 +574,9 @@ void RankSetupTest::testRankSetup() EXPECT_EQUAL(rs.getMutateOnSecondPhase()._operation, "=7"); EXPECT_EQUAL(rs.getMutateOnSummary()._attribute, "c"); EXPECT_EQUAL(rs.getMutateOnSummary()._operation, "-=2"); - + EXPECT_EQUAL(rs.get_global_filter_lower_limit(), 0.3); + EXPECT_EQUAL(rs.get_global_filter_upper_limit(), 0.7); + EXPECT_EQUAL(rs.get_target_hits_max_adjustment_factor(), 5.0); } bool diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index be631be6dca..453b7b321b9 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -842,14 +842,16 @@ public: } try { auto calc = tensor::DistanceCalculator::make_with_validation(_attr, *query_tensor); + const auto& params = getRequestContext().get_attribute_blueprint_params(); setResult(std::make_unique<queryeval::NearestNeighborBlueprint>(_field, std::move(calc), n.get_target_num_hits(), n.get_allow_approximate(), n.get_explore_additional_hits(), n.get_distance_threshold(), - getRequestContext().get_attribute_blueprint_params().global_filter_lower_limit, - getRequestContext().get_attribute_blueprint_params().global_filter_upper_limit, + params.global_filter_lower_limit, + params.global_filter_upper_limit, + params.target_hits_max_adjustment_factor, getRequestContext().getDoom())); } catch (const vespalib::IllegalArgumentException& ex) { return fail_nearest_neighbor_term(n, ex.getMessage()); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h index 39f58c5382e..64213235c23 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h @@ -13,17 +13,21 @@ struct AttributeBlueprintParams { double global_filter_lower_limit; double global_filter_upper_limit; + double target_hits_max_adjustment_factor; AttributeBlueprintParams(double global_filter_lower_limit_in, - double global_filter_upper_limit_in) + double global_filter_upper_limit_in, + double target_hits_max_adjustment_factor_in) : global_filter_lower_limit(global_filter_lower_limit_in), - global_filter_upper_limit(global_filter_upper_limit_in) + global_filter_upper_limit(global_filter_upper_limit_in), + target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in) { } AttributeBlueprintParams() : AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE, - fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE) + fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE, + fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE) { } }; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 8be44ce0a0c..7871e66970e 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -422,6 +422,22 @@ GlobalFilterUpperLimit::lookup(const Properties &props, double defaultValue) return lookupDouble(props, NAME, defaultValue); } +const vespalib::string TargetHitsMaxAdjustmentFactor::NAME("vespa.matching.nns.target_hits_max_adjustment_factor"); + +const double TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE(20.0); + +double +TargetHitsMaxAdjustmentFactor::lookup(const Properties& props) +{ + return lookup(props, DEFAULT_VALUE); +} + +double +TargetHitsMaxAdjustmentFactor::lookup(const Properties& props, double defaultValue) +{ + return lookupDouble(props, NAME, defaultValue); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index f538e7bef2e..4f38a27d3fe 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -313,6 +313,21 @@ namespace matching { static double lookup(const Properties &props); static double lookup(const Properties &props, double defaultValue); }; + + /** + * Property to control the auto-adjustment of targetHits in a nearestNeighbor search using HNSW index with post-filtering. + * + * The targetHits is auto-adjusted in an effort to expose targetHits hits to first-phase ranking after post-filtering: + * adjustedTargetHits = min(targetHits / estimatedHitRatio, targetHits * targetHitsMaxAdjustmentFactor). + * + * This property ensures an upper bound of adjustedTargetHits, avoiding that the search in the HNSW index takes too long. + **/ + struct TargetHitsMaxAdjustmentFactor { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + static double lookup(const Properties &props, double defaultValue); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 823e39199df..9d4e547feef 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -68,6 +68,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _softTimeoutTailCost(0.1), _global_filter_lower_limit(0.0), _global_filter_upper_limit(1.0), + _target_hits_max_adjustment_factor(20.0), _mutateOnMatch(), _mutateOnFirstPhase(), _mutateOnSecondPhase(), @@ -121,6 +122,7 @@ RankSetup::configure() setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties())); set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties())); set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties())); + set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties())); _mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties()); _mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties()); _mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties()); diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 832b86d042a..72432c2ed8a 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -76,6 +76,7 @@ private: double _softTimeoutTailCost; double _global_filter_lower_limit; double _global_filter_upper_limit; + double _target_hits_max_adjustment_factor; MutateOperation _mutateOnMatch; MutateOperation _mutateOnFirstPhase; MutateOperation _mutateOnSecondPhase; @@ -393,6 +394,8 @@ public: double get_global_filter_lower_limit() const { return _global_filter_lower_limit; } void set_global_filter_upper_limit(double v) { _global_filter_upper_limit = v; } double get_global_filter_upper_limit() const { return _global_filter_upper_limit; } + void set_target_hits_max_adjustment_factor(double v) { _target_hits_max_adjustment_factor = v; } + double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; } /** * This method may be used to indicate that certain features diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 62937129f37..a70f387100b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -43,6 +43,7 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f double distance_threshold, double global_filter_lower_limit, double global_filter_upper_limit, + double target_hits_max_adjustment_factor, const vespalib::Doom& doom) : ComplexLeafBlueprint(field), _distance_calc(std::move(distance_calc)), @@ -55,6 +56,7 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _distance_threshold(std::numeric_limits<double>::max()), _global_filter_lower_limit(global_filter_lower_limit), _global_filter_upper_limit(global_filter_upper_limit), + _target_hits_max_adjustment_factor(target_hits_max_adjustment_factor), _distance_heap(target_hits), _found_hits(), _algorithm(Algorithm::EXACT), @@ -95,8 +97,10 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter, d } else { // post-filtering case // The goal is to expose 'targetHits' hits to first-phase ranking. // We try to achieve this by adjusting targetHits based on the estimated hit ratio of the query before post-filtering. + // However, this is bound by 'target-hits-max-adjustment-factor' to limit the cost of searching the HNSW index. if (estimated_hit_ratio > 0.0) { - _adjusted_target_hits = static_cast<double>(_target_hits) / estimated_hit_ratio; + _adjusted_target_hits = std::min(static_cast<double>(_target_hits) / estimated_hit_ratio, + static_cast<double>(_target_hits) * _target_hits_max_adjustment_factor); } } if (_algorithm != Algorithm::EXACT_FALLBACK) { diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index f88cdd5adb1..174f0b23125 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -38,6 +38,7 @@ private: double _distance_threshold; double _global_filter_lower_limit; double _global_filter_upper_limit; + double _target_hits_max_adjustment_factor; mutable NearestNeighborDistanceHeap _distance_heap; std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits; Algorithm _algorithm; @@ -55,6 +56,7 @@ public: double distance_threshold, double global_filter_lower_limit, double global_filter_upper_limit, + double target_hits_max_adjustment_factor, const vespalib::Doom& doom); NearestNeighborBlueprint(const NearestNeighborBlueprint&) = delete; NearestNeighborBlueprint& operator=(const NearestNeighborBlueprint&) = delete; diff --git a/storage/src/tests/distributor/btree_bucket_database_test.cpp b/storage/src/tests/distributor/btree_bucket_database_test.cpp index 14d5a4142a8..40575cacfba 100644 --- a/storage/src/tests/distributor/btree_bucket_database_test.cpp +++ b/storage/src/tests/distributor/btree_bucket_database_test.cpp @@ -19,15 +19,15 @@ using document::BucketId; namespace { -BucketCopy BC(uint32_t node_idx, uint32_t state) { +BucketCopy BC(uint16_t node_idx, uint32_t state) { api::BucketInfo info(0x123, state, state); - return BucketCopy(0, node_idx, info); + return {0, node_idx, info}; } BucketInfo BI(uint32_t node_idx, uint32_t state) { BucketInfo bi; - bi.addNode(BC(node_idx, state), toVector<uint16_t>(0)); + bi.addNode(BC(node_idx, state), {0}); return bi; } diff --git a/storage/src/tests/distributor/bucketdatabasetest.cpp b/storage/src/tests/distributor/bucketdatabasetest.cpp index fcc64e0cccf..032b8ad8a9c 100644 --- a/storage/src/tests/distributor/bucketdatabasetest.cpp +++ b/storage/src/tests/distributor/bucketdatabasetest.cpp @@ -1,9 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "bucketdatabasetest.h" +#include <vespa/storage/storageutil/utils.h> #include <vespa/vespalib/util/benchmark_timer.h> #include <chrono> -#include <iomanip> #include <algorithm> namespace storage::distributor { @@ -16,21 +16,21 @@ void BucketDatabaseTest::SetUp() { namespace { -BucketCopy BC(uint32_t nodeIdx) { +BucketCopy BC(uint16_t nodeIdx) { return BucketCopy(0, nodeIdx, api::BucketInfo()); } -BucketInfo BI(uint32_t nodeIdx) { +BucketInfo BI(uint16_t nodeIdx) { BucketInfo bi; - bi.addNode(BC(nodeIdx), toVector<uint16_t>(0)); + bi.addNode(BC(nodeIdx), {0}); return bi; } -BucketInfo BI3(uint32_t node0, uint32_t node1, uint32_t node2) { +BucketInfo BI3(uint16_t node0, uint16_t node1, uint16_t node2) { BucketInfo bi; - bi.addNode(BC(node0), toVector<uint16_t>(node0, node1, node2)); - bi.addNode(BC(node1), toVector<uint16_t>(node0, node1, node2)); - bi.addNode(BC(node2), toVector<uint16_t>(node0, node1, node2)); + bi.addNode(BC(node0), {node0, node1, node2}); + bi.addNode(BC(node1), {node0, node1, node2}); + bi.addNode(BC(node2), {node0, node1, node2}); return bi; } diff --git a/storage/src/tests/distributor/bucketdatabasetest.h b/storage/src/tests/distributor/bucketdatabasetest.h index 33f914f8fd2..f24a62728d3 100644 --- a/storage/src/tests/distributor/bucketdatabasetest.h +++ b/storage/src/tests/distributor/bucketdatabasetest.h @@ -2,7 +2,6 @@ #pragma once #include <vespa/storage/bucketdb/bucketdatabase.h> -#include <vespa/storage/storageutil/utils.h> #include <vespa/vespalib/gtest/gtest.h> #include <functional> @@ -11,19 +10,14 @@ namespace storage::distributor { struct BucketDatabaseTest : public ::testing::TestWithParam<std::shared_ptr<BucketDatabase>> { void SetUp() override ; - std::string doFindParents(const std::vector<document::BucketId>& ids, - const document::BucketId& searchId); - std::string doFindAll(const std::vector<document::BucketId>& ids, - const document::BucketId& searchId); + std::string doFindParents(const std::vector<document::BucketId>& ids, const document::BucketId& searchId); + std::string doFindAll(const std::vector<document::BucketId>& ids, const document::BucketId& searchId); document::BucketId doCreate(const std::vector<document::BucketId>& ids, - uint32_t minBits, - const document::BucketId& wantedId); + uint32_t minBits, const document::BucketId& wantedId); BucketDatabase& db() noexcept { return *GetParam(); } - using UBoundFunc = std::function< - document::BucketId(const BucketDatabase&, - const document::BucketId&)>; + using UBoundFunc = std::function<document::BucketId(const BucketDatabase&, const document::BucketId&)>; void doTestUpperBound(const UBoundFunc& f); }; diff --git a/storage/src/tests/distributor/bucketstateoperationtest.cpp b/storage/src/tests/distributor/bucketstateoperationtest.cpp index 42ee4675e26..c9fab0b37e5 100644 --- a/storage/src/tests/distributor/bucketstateoperationtest.cpp +++ b/storage/src/tests/distributor/bucketstateoperationtest.cpp @@ -3,6 +3,7 @@ #include <tests/distributor/distributor_stripe_test_util.h> #include <vespa/storage/distributor/operations/idealstate/setbucketstateoperation.h> #include <vespa/storage/distributor/top_level_distributor.h> +#include <vespa/storage/storageutil/utils.h> #include <vespa/document/test/make_document_bucket.h> #include <vespa/vespalib/gtest/gtest.h> #include "dummy_cluster_context.h" diff --git a/storage/src/tests/distributor/distributor_bucket_space_test.cpp b/storage/src/tests/distributor/distributor_bucket_space_test.cpp index 3ea4c1ca3c2..00bc803e81c 100644 --- a/storage/src/tests/distributor/distributor_bucket_space_test.cpp +++ b/storage/src/tests/distributor/distributor_bucket_space_test.cpp @@ -100,10 +100,10 @@ DistributorBucketSpaceTest::CountVector DistributorBucketSpaceTest::count_service_layer_buckets(const std::vector<BucketId>& buckets) { CountVector result(3); - std::vector<uint16_t> ideal_nodes; for (auto& bucket : buckets) { const auto & ideal_nodes_bundle = bucket_space.get_ideal_service_layer_nodes_bundle(bucket); for (uint32_t i = 0; i < 3; ++i) { + IdealServiceLayerNodesBundle::ConstNodesRef ideal_nodes; switch (i) { case 0: ideal_nodes = ideal_nodes_bundle.available_nodes(); diff --git a/storage/src/tests/distributor/distributor_stripe_test_util.cpp b/storage/src/tests/distributor/distributor_stripe_test_util.cpp index 6ececa39583..5babde49380 100644 --- a/storage/src/tests/distributor/distributor_stripe_test_util.cpp +++ b/storage/src/tests/distributor/distributor_stripe_test_util.cpp @@ -10,6 +10,7 @@ #include <vespa/storage/distributor/distributormetricsset.h> #include <vespa/storage/distributor/ideal_state_total_metrics.h> #include <vespa/storage/distributor/node_supported_features_repo.h> +#include <vespa/storage/storageutil/utils.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vespalib/text/stringtokenizer.h> #include <vespa/vespalib/stllike/hash_map.hpp> @@ -225,7 +226,7 @@ DistributorStripeTestUtil::addIdealNodes(const lib::ClusterState& state, const d for (uint32_t i = 0; i < res.size(); ++i) { if (state.getNodeState(lib::Node(lib::NodeType::STORAGE, res[i])).getState() != lib::State::MAINTENANCE) { - entry->addNode(BucketCopy(0, res[i], api::BucketInfo(1,1,1)), toVector<uint16_t>(0)); + entry->addNode(BucketCopy(0, res[i], api::BucketInfo(1,1,1)), {0}); } } @@ -324,7 +325,7 @@ DistributorStripeTestUtil::insertBucketInfo(document::BucketId id, uint16_t node info2.setActive(); } BucketCopy copy(operation_context().generate_unique_timestamp(), node, info2); - entry->addNode(copy.setTrusted(trusted), toVector<uint16_t>(0)); + entry->addNode(copy.setTrusted(trusted), {0}); getBucketDatabase().update(entry); } diff --git a/storage/src/tests/distributor/distributor_stripe_test_util.h b/storage/src/tests/distributor/distributor_stripe_test_util.h index 9963b2c96b4..272301bf4a6 100644 --- a/storage/src/tests/distributor/distributor_stripe_test_util.h +++ b/storage/src/tests/distributor/distributor_stripe_test_util.h @@ -7,6 +7,7 @@ #include <tests/common/teststorageapp.h> #include <vespa/storage/common/hostreporter/hostinfo.h> #include <vespa/storage/distributor/stripe_host_info_notifier.h> +#include <vespa/storage/storageutil/utils.h> namespace storage { diff --git a/storage/src/tests/distributor/garbagecollectiontest.cpp b/storage/src/tests/distributor/garbagecollectiontest.cpp index c2f4836f4cb..9b5056f2066 100644 --- a/storage/src/tests/distributor/garbagecollectiontest.cpp +++ b/storage/src/tests/distributor/garbagecollectiontest.cpp @@ -71,8 +71,7 @@ struct GarbageCollectionOperationTest : Test, DistributorStripeTestUtil { std::shared_ptr<GarbageCollectionOperation> create_op() { auto op = std::make_shared<GarbageCollectionOperation>( - dummy_cluster_context, BucketAndNodes(makeDocumentBucket(_bucket_id), - toVector<uint16_t>(0, 1))); + dummy_cluster_context, BucketAndNodes(makeDocumentBucket(_bucket_id), {0, 1})); op->setIdealStateManager(&getIdealStateManager()); return op; } diff --git a/storage/src/tests/distributor/operationtargetresolvertest.cpp b/storage/src/tests/distributor/operationtargetresolvertest.cpp index 2d41b0f4d32..19ca81e933f 100644 --- a/storage/src/tests/distributor/operationtargetresolvertest.cpp +++ b/storage/src/tests/distributor/operationtargetresolvertest.cpp @@ -3,7 +3,6 @@ #include <tests/distributor/distributor_stripe_test_util.h> #include <vespa/config/helper/configgetter.h> #include <vespa/config/helper/configgetter.hpp> -#include <vespa/document/config/config-documenttypes.h> #include <vespa/document/repo/documenttyperepo.h> #include <vespa/document/test/make_bucket_space.h> #include <vespa/document/test/make_document_bucket.h> @@ -14,7 +13,6 @@ #include <vespa/storageapi/message/bucket.h> #include <vespa/storageapi/message/persistence.h> #include <vespa/vdslib/distribution/distribution.h> -#include <vespa/vdslib/distribution/idealnodecalculatorimpl.h> #include <vespa/vespalib/gtest/gtest.h> using document::BucketId; @@ -112,14 +110,10 @@ struct TestTargets { } // anonymous BucketInstanceList -OperationTargetResolverTest::getInstances(const BucketId& id, - bool stripToRedundancy) +OperationTargetResolverTest::getInstances(const BucketId& id, bool stripToRedundancy) { - lib::IdealNodeCalculatorImpl idealNodeCalc; auto &bucketSpaceRepo(operation_context().bucket_space_repo()); auto &distributorBucketSpace(bucketSpaceRepo.get(makeBucketSpace())); - idealNodeCalc.setDistribution(distributorBucketSpace.getDistribution()); - idealNodeCalc.setClusterState(distributorBucketSpace.getClusterState()); OperationTargetResolverImpl resolver( distributorBucketSpace, distributorBucketSpace.getBucketDatabase(), 16, distributorBucketSpace.getDistribution().getRedundancy(), @@ -142,24 +136,6 @@ TEST_F(OperationTargetResolverTest, simple) { .sendsTo(BucketId(16, 0), 0); } -TEST_F(OperationTargetResolverTest, multiple_nodes) { - setup_stripe(1, 2, "storage:2 distributor:1"); - - auto &bucketSpaceRepo(operation_context().bucket_space_repo()); - auto &distributorBucketSpace(bucketSpaceRepo.get(makeBucketSpace())); - for (int i = 0; i < 100; ++i) { - addNodesToBucketDB(BucketId(16, i), "0=0,1=0"); - - lib::IdealNodeCalculatorImpl idealNodeCalc; - idealNodeCalc.setDistribution(distributorBucketSpace.getDistribution()); - idealNodeCalc.setClusterState(distributorBucketSpace.getClusterState()); - lib::IdealNodeList idealNodes( - idealNodeCalc.getIdealStorageNodes(BucketId(16, i))); - uint16_t expectedNode = idealNodes[0].getIndex(); - MY_ASSERT_THAT(BucketId(32, i)).sendsTo(BucketId(16, i), expectedNode); - } -} - TEST_F(OperationTargetResolverTest, choose_ideal_state_when_many_copies) { setup_stripe(2, 4, "storage:4 distributor:1"); addNodesToBucketDB(BucketId(16, 0), "0=0,1=0,2=0,3=0"); // ideal nodes: 1, 3 diff --git a/storage/src/tests/distributor/simplemaintenancescannertest.cpp b/storage/src/tests/distributor/simplemaintenancescannertest.cpp index b5dc72d995b..3d3c58ba842 100644 --- a/storage/src/tests/distributor/simplemaintenancescannertest.cpp +++ b/storage/src/tests/distributor/simplemaintenancescannertest.cpp @@ -82,7 +82,7 @@ TEST_F(SimpleMaintenanceScannerTest, prioritize_single_bucket) { TEST_F(SimpleMaintenanceScannerTest, prioritize_single_bucket_alt_bucket_space) { document::BucketSpace bucketSpace(4); _bucketSpaceRepo->add(bucketSpace, std::make_unique<DistributorBucketSpace>()); - _scanner->reset(); + (void)_scanner->fetch_and_reset(); addBucketToDb(bucketSpace, 1); std::string expected("PrioritizedBucket(Bucket(BucketSpace(0x0000000000000004), BucketId(0x4000000000000001)), pri VERY_HIGH)\n"); @@ -148,7 +148,7 @@ TEST_F(SimpleMaintenanceScannerTest, reset) { ASSERT_TRUE(scanEntireDatabase(0)); EXPECT_EQ(expected, _priorityDb->toString()); - _scanner->reset(); + (void)_scanner->fetch_and_reset(); ASSERT_TRUE(scanEntireDatabase(3)); expected = "PrioritizedBucket(Bucket(BucketSpace(0x0000000000000001), BucketId(0x4000000000000001)), pri VERY_HIGH)\n" @@ -180,7 +180,7 @@ TEST_F(SimpleMaintenanceScannerTest, pending_maintenance_operation_statistics) { EXPECT_EQ(expected, stringifyGlobalPendingStats(stats)); } - _scanner->reset(); + (void)_scanner->fetch_and_reset(); { const auto & stats = _scanner->getPendingMaintenanceStats(); EXPECT_EQ(expectedEmpty, stringifyGlobalPendingStats(stats)); @@ -301,7 +301,7 @@ TEST_F(SimpleMaintenanceScannerTest, merge_pending_maintenance_stats) { TEST_F(SimpleMaintenanceScannerTest, empty_bucket_db_is_immediately_done_by_default) { auto res = _scanner->scanNext(); EXPECT_TRUE(res.isDone()); - _scanner->reset(); + (void)_scanner->fetch_and_reset(); res = _scanner->scanNext(); EXPECT_TRUE(res.isDone()); } diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index 4ca4d70a816..13c982f5a77 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -7,6 +7,7 @@ #include <vespa/document/test/make_document_bucket.h> #include <vespa/storage/distributor/top_level_bucket_db_updater.h> #include <vespa/storage/distributor/top_level_distributor.h> +#include <vespa/storage/distributor/activecopy.h> #include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/storage/distributor/distributor_stripe.h> #include <vespa/storage/distributor/operations/idealstate/mergeoperation.h> @@ -1587,7 +1588,9 @@ TEST_F(StateCheckersTest, context_populates_ideal_state_containers) { StateChecker::Context c(node_context(), operation_context(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket({17, 0})); - ASSERT_THAT(c.idealState(), ElementsAre(1, 3)); + ASSERT_EQ(2, c.idealState().size()); + ASSERT_EQ(1, c.idealState()[0]); + ASSERT_EQ(3, c.idealState()[1]); for (uint16_t node : c.idealState()) { ASSERT_TRUE(c.idealStateBundle.is_nonretired_or_maintenance(node)); } @@ -1736,4 +1739,9 @@ TEST_F(StateCheckersTest, stats_updates_for_maximum_time_since_gc_run) { EXPECT_EQ(runner.stats().max_observed_time_since_last_gc(), 1900s); } +TEST(ActiveCopyTest, control_size) { + EXPECT_EQ(12, sizeof(ActiveCopy)); + EXPECT_EQ(64, sizeof(IdealServiceLayerNodesBundle)); +} + } diff --git a/storage/src/tests/distributor/top_level_distributor_test_util.cpp b/storage/src/tests/distributor/top_level_distributor_test_util.cpp index 9859a6fb237..6bbe7a47da2 100644 --- a/storage/src/tests/distributor/top_level_distributor_test_util.cpp +++ b/storage/src/tests/distributor/top_level_distributor_test_util.cpp @@ -10,6 +10,7 @@ #include <vespa/storage/distributor/distributor_stripe_pool.h> #include <vespa/storage/distributor/distributor_stripe_thread.h> #include <vespa/storage/distributor/distributor_total_metrics.h> +#include <vespa/storage/storageutil/utils.h> #include <vespa/storage/common/bucket_stripe_utils.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vespalib/text/stringtokenizer.h> diff --git a/storage/src/tests/distributor/top_level_distributor_test_util.h b/storage/src/tests/distributor/top_level_distributor_test_util.h index cd5db7c8f80..51700848733 100644 --- a/storage/src/tests/distributor/top_level_distributor_test_util.h +++ b/storage/src/tests/distributor/top_level_distributor_test_util.h @@ -7,7 +7,6 @@ #include <tests/common/teststorageapp.h> #include <vespa/storage/common/hostreporter/hostinfo.h> #include <vespa/storage/frameworkimpl/component/distributorcomponentregisterimpl.h> -#include <vespa/storage/storageutil/utils.h> #include <vespa/storageapi/message/state.h> #include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> diff --git a/storage/src/vespa/storage/common/distributorcomponent.h b/storage/src/vespa/storage/common/distributorcomponent.h index 06bb49a6090..6542bf2ddfe 100644 --- a/storage/src/vespa/storage/common/distributorcomponent.h +++ b/storage/src/vespa/storage/common/distributorcomponent.h @@ -34,13 +34,6 @@ namespace storage { -namespace bucketdb { - class DistrBucketDatabase; -} -namespace lib { - class IdealNodeCalculator; -} - using DistributorConfig = vespa::config::content::core::internal::InternalStorDistributormanagerType; using VisitorConfig = vespa::config::content::core::internal::InternalStorVisitordispatcherType; diff --git a/storage/src/vespa/storage/distributor/activecopy.cpp b/storage/src/vespa/storage/distributor/activecopy.cpp index 5d59d1a838f..4e3ef4f88ee 100644 --- a/storage/src/vespa/storage/distributor/activecopy.cpp +++ b/storage/src/vespa/storage/distributor/activecopy.cpp @@ -1,8 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "activecopy.h" - -#include <vespa/storage/storageutil/utils.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vespalib/stllike/asciistream.h> #include <algorithm> @@ -28,26 +26,9 @@ namespace storage::distributor { using IndexList = lib::Distribution::IndexList; -ActiveCopy::ActiveCopy(uint16_t node, const BucketDatabase::Entry& e, const std::vector<uint16_t>& idealState) - : _nodeIndex(node), - _ideal(0xffff) -{ - const BucketCopy* copy = e->getNode(node); - assert(copy != nullptr); - _doc_count = copy->getDocumentCount(); - _ready = copy->ready(); - _active = copy->active(); - for (uint32_t i=0; i<idealState.size(); ++i) { - if (idealState[i] == node) { - _ideal = i; - break; - } - } -} - vespalib::string ActiveCopy::getReason() const { - if (_ready && (_doc_count > 0) && (_ideal < 0xffff)) { + if (_ready && (_doc_count > 0) && valid_ideal()) { vespalib::asciistream ost; ost << "copy is ready, has " << _doc_count << " docs and ideal state priority " << _ideal; @@ -58,7 +39,7 @@ ActiveCopy::getReason() const { return ost.str(); } else if (_ready) { return "copy is ready"; - } else if ((_doc_count > 0) && (_ideal < 0xffff)) { + } else if ((_doc_count > 0) && valid_ideal()) { vespalib::asciistream ost; ost << "copy has " << _doc_count << " docs and ideal state priority " << _ideal; return ost.str(); @@ -68,7 +49,7 @@ ActiveCopy::getReason() const { return ost.str(); } else if (_active) { return "copy is already active"; - } else if (_ideal < 0xffff) { + } else if (valid_ideal()) { vespalib::asciistream ost; ost << "copy is ideal state priority " << _ideal; return ost.str(); @@ -86,7 +67,7 @@ operator<<(std::ostream& out, const ActiveCopy & e) { if (e._doc_count > 0) { out << ", doc_count " << e._doc_count; } - if (e._ideal < 0xffff) { + if (e.valid_ideal()) { out << ", ideal pri " << e._ideal; } out << ")"; @@ -95,26 +76,8 @@ operator<<(std::ostream& out, const ActiveCopy & e) { namespace { -struct ActiveStateOrder { - bool operator()(const ActiveCopy & e1, const ActiveCopy & e2) noexcept { - if (e1._ready != e2._ready) { - return e1._ready; - } - if (e1._doc_count != e2._doc_count) { - return e1._doc_count > e2._doc_count; - } - if (e1._ideal != e2._ideal) { - return e1._ideal < e2._ideal; - } - if (e1._active != e2._active) { - return e1._active; - } - return e1._nodeIndex < e2._nodeIndex; - } -}; - IndexList -buildValidNodeIndexList(BucketDatabase::Entry& e) { +buildValidNodeIndexList(const BucketDatabase::Entry& e) { IndexList result; result.reserve(e->getNodeCount()); for (uint32_t i=0, n=e->getNodeCount(); i < n; ++i) { @@ -126,22 +89,45 @@ buildValidNodeIndexList(BucketDatabase::Entry& e) { return result; } -std::vector<ActiveCopy> -buildNodeList(BucketDatabase::Entry& e,vespalib::ConstArrayRef<uint16_t> nodeIndexes, const std::vector<uint16_t>& idealState) +using SmallActiveCopyList = vespalib::SmallVector<ActiveCopy, 2>; +static_assert(sizeof(SmallActiveCopyList) == 40); + +SmallActiveCopyList +buildNodeList(const BucketDatabase::Entry& e,vespalib::ConstArrayRef<uint16_t> nodeIndexes, const IdealServiceLayerNodesBundle::Node2Index & idealState) { - std::vector<ActiveCopy> result; + SmallActiveCopyList result; result.reserve(nodeIndexes.size()); for (uint16_t nodeIndex : nodeIndexes) { - result.emplace_back(nodeIndex, e, idealState); + const BucketCopy *copy = e->getNode(nodeIndex); + assert(copy); + result.emplace_back(nodeIndex, *copy, idealState.lookup(nodeIndex)); } return result; } } +struct ActiveStateOrder { + bool operator()(const ActiveCopy & e1, const ActiveCopy & e2) noexcept { + if (e1._ready != e2._ready) { + return e1._ready; + } + if (e1._doc_count != e2._doc_count) { + return e1._doc_count > e2._doc_count; + } + if (e1._ideal != e2._ideal) { + return e1._ideal < e2._ideal; + } + if (e1._active != e2._active) { + return e1._active; + } + return e1.nodeIndex() < e2.nodeIndex(); + } +}; + ActiveList -ActiveCopy::calculate(const std::vector<uint16_t>& idealState, const lib::Distribution& distribution, - BucketDatabase::Entry& e, uint32_t max_activation_inhibited_out_of_sync_groups) +ActiveCopy::calculate(const Node2Index & idealState, const lib::Distribution& distribution, + const BucketDatabase::Entry& e, uint32_t max_activation_inhibited_out_of_sync_groups) { IndexList validNodesWithCopy = buildValidNodeIndexList(e); if (validNodesWithCopy.empty()) { @@ -161,7 +147,7 @@ ActiveCopy::calculate(const std::vector<uint16_t>& idealState, const lib::Distri : api::BucketInfo()); // Invalid by default uint32_t inhibited_groups = 0; for (const auto& group_nodes : groups) { - std::vector<ActiveCopy> entries = buildNodeList(e, group_nodes, idealState); + SmallActiveCopyList entries = buildNodeList(e, group_nodes, idealState); auto best = std::min_element(entries.begin(), entries.end(), ActiveStateOrder()); if ((groups.size() > 1) && (inhibited_groups < max_activation_inhibited_out_of_sync_groups) && @@ -179,24 +165,22 @@ ActiveCopy::calculate(const std::vector<uint16_t>& idealState, const lib::Distri } void -ActiveList::print(std::ostream& out, bool verbose, - const std::string& indent) const +ActiveList::print(std::ostream& out, bool verbose, const std::string& indent) const { out << "["; if (verbose) { for (size_t i=0; i<_v.size(); ++i) { - out << "\n" << indent << " " - << _v[i]._nodeIndex << " " << _v[i].getReason(); + out << "\n" << indent << " " << _v[i].nodeIndex() << " " << _v[i].getReason(); } if (!_v.empty()) { out << "\n" << indent; } } else { if (!_v.empty()) { - out << _v[0]._nodeIndex; + out << _v[0].nodeIndex(); } for (size_t i=1; i<_v.size(); ++i) { - out << " " << _v[i]._nodeIndex; + out << " " << _v[i].nodeIndex(); } } out << "]"; @@ -206,7 +190,7 @@ bool ActiveList::contains(uint16_t node) const noexcept { for (const auto& candidate : _v) { - if (node == candidate._nodeIndex) { + if (node == candidate.nodeIndex()) { return true; } } diff --git a/storage/src/vespa/storage/distributor/activecopy.h b/storage/src/vespa/storage/distributor/activecopy.h index 258fe3cdf16..a2de77306be 100644 --- a/storage/src/vespa/storage/distributor/activecopy.h +++ b/storage/src/vespa/storage/distributor/activecopy.h @@ -2,25 +2,43 @@ #pragma once +#include "ideal_service_layer_nodes_bundle.h" #include <vespa/storage/bucketdb/bucketdatabase.h> namespace storage::lib { class Distribution; } namespace storage::distributor { class ActiveList; +struct ActiveStateOrder; -struct ActiveCopy { - constexpr ActiveCopy() noexcept : _nodeIndex(-1), _ideal(-1), _doc_count(0), _ready(false), _active(false) { } - ActiveCopy(uint16_t node, const BucketDatabase::Entry& e, const std::vector<uint16_t>& idealState); +class ActiveCopy { + using Index = IdealServiceLayerNodesBundle::Index; + using Node2Index = IdealServiceLayerNodesBundle::Node2Index; +public: + constexpr ActiveCopy() noexcept + : _nodeIndex(Index::invalid()), + _ideal(Index::invalid()), + _doc_count(0), + _ready(false), + _active(false) + { } + ActiveCopy(uint16_t node, const BucketCopy & copy, uint16_t ideal) noexcept + : _nodeIndex(node), + _ideal(ideal), + _doc_count(copy.getDocumentCount()), + _ready(copy.ready()), + _active(copy.active()) + { } vespalib::string getReason() const; friend std::ostream& operator<<(std::ostream& out, const ActiveCopy& e); - static ActiveList calculate(const std::vector<uint16_t>& idealState, - const lib::Distribution&, - BucketDatabase::Entry&, - uint32_t max_activation_inhibited_out_of_sync_groups); - + static ActiveList calculate(const Node2Index & idealState, const lib::Distribution&, + const BucketDatabase::Entry&, uint32_t max_activation_inhibited_out_of_sync_groups); + uint16_t nodeIndex() const noexcept { return _nodeIndex; } +private: + friend ActiveStateOrder; + bool valid_ideal() const noexcept { return _ideal < Index::invalid(); } uint16_t _nodeIndex; uint16_t _ideal; uint32_t _doc_count; @@ -29,8 +47,6 @@ struct ActiveCopy { }; class ActiveList : public vespalib::Printable { - std::vector<ActiveCopy> _v; - public: ActiveList() {} ActiveList(std::vector<ActiveCopy>&& v) : _v(std::move(v)) { } @@ -41,6 +57,8 @@ public: [[nodiscard]] bool empty() const noexcept { return _v.empty(); } size_t size() const noexcept { return _v.size(); } void print(std::ostream&, bool verbose, const std::string& indent) const override; +private: + std::vector<ActiveCopy> _v; }; } diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp index 299aaffb569..7ba9c67b156 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp @@ -121,9 +121,9 @@ setup_ideal_nodes_bundle(IdealServiceLayerNodesBundle& ideal_nodes_bundle, const lib::ClusterState& cluster_state, document::BucketId bucket) { - ideal_nodes_bundle.set_available_nodes(distribution.getIdealStorageNodes(cluster_state, bucket, up_states)); - ideal_nodes_bundle.set_available_nonretired_nodes(distribution.getIdealStorageNodes(cluster_state, bucket, nonretired_up_states)); - ideal_nodes_bundle.set_available_nonretired_or_maintenance_nodes(distribution.getIdealStorageNodes(cluster_state, bucket, nonretired_or_maintenance_up_states)); + ideal_nodes_bundle.set_nodes(distribution.getIdealStorageNodes(cluster_state, bucket, up_states), + distribution.getIdealStorageNodes(cluster_state, bucket, nonretired_up_states), + distribution.getIdealStorageNodes(cluster_state, bucket, nonretired_or_maintenance_up_states)); } /* diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp index 37d81f45ac1..b686c6bc80c 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp +++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp @@ -314,7 +314,7 @@ DistributorStripe::enterRecoveryMode() { LOG(debug, "Entering recovery mode"); _schedulingMode = MaintenanceScheduler::RECOVERY_SCHEDULING_MODE; - _scanner->reset(); // Just drop accumulated stat on the floor. + (void)_scanner->fetch_and_reset(); // Just drop accumulated stats on the floor. // We enter recovery mode due to cluster state or distribution config changes. // Until we have completed a new DB scan round, we don't know the state of our // newly owned buckets and must not report stats for these out to the cluster @@ -643,7 +643,7 @@ DistributorStripe::updateInternalMetricsForCompletedScan() _bucketDBMetricUpdater.completeRound(); _bucketDbStats = _bucketDBMetricUpdater.getLastCompleteStats(); - _maintenanceStats = _scanner->reset(); + _maintenanceStats = _scanner->fetch_and_reset(); auto new_space_stats = toBucketSpacesStats(_maintenanceStats.perNodeStats); if (merge_no_longer_pending_edge(_bucketSpacesStats, new_space_stats)) { _must_send_updated_host_info = true; diff --git a/storage/src/vespa/storage/distributor/distributor_stripe_component.cpp b/storage/src/vespa/storage/distributor/distributor_stripe_component.cpp index 16cc887096f..47b89b2dd19 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe_component.cpp +++ b/storage/src/vespa/storage/distributor/distributor_stripe_component.cpp @@ -5,6 +5,7 @@ #include "distributor_bucket_space.h" #include "pendingmessagetracker.h" #include "storage_node_up_states.h" +#include <vespa/storage/storageutil/utils.h> #include <vespa/storageframework/generic/clock/clock.h> #include <vespa/document/select/parser.h> #include <vespa/vdslib/state/cluster_state_bundle.h> @@ -53,18 +54,19 @@ class UpdateBucketDatabaseProcessor : public BucketDatabase::EntryUpdateProcesso const std::vector<BucketCopy>& _changed_nodes; std::vector<uint16_t> _ideal_nodes; bool _reset_trusted; + using ConstNodesRef = IdealServiceLayerNodesBundle::ConstNodesRef; public: - UpdateBucketDatabaseProcessor(const framework::Clock& clock, const std::vector<BucketCopy>& changed_nodes, std::vector<uint16_t> ideal_nodes, bool reset_trusted); + UpdateBucketDatabaseProcessor(const framework::Clock& clock, const std::vector<BucketCopy>& changed_nodes, ConstNodesRef ideal_nodes, bool reset_trusted); ~UpdateBucketDatabaseProcessor() override; BucketDatabase::Entry create_entry(const document::BucketId& bucket) const override; bool process_entry(BucketDatabase::Entry &entry) const override; }; -UpdateBucketDatabaseProcessor::UpdateBucketDatabaseProcessor(const framework::Clock& clock, const std::vector<BucketCopy>& changed_nodes, std::vector<uint16_t> ideal_nodes, bool reset_trusted) +UpdateBucketDatabaseProcessor::UpdateBucketDatabaseProcessor(const framework::Clock& clock, const std::vector<BucketCopy>& changed_nodes, ConstNodesRef ideal_nodes, bool reset_trusted) : BucketDatabase::EntryUpdateProcessor(), _clock(clock), _changed_nodes(changed_nodes), - _ideal_nodes(std::move(ideal_nodes)), + _ideal_nodes(ideal_nodes.cbegin(), ideal_nodes.cend()), _reset_trusted(reset_trusted) { } @@ -244,4 +246,14 @@ DistributorStripeComponent::parse_selection(const vespalib::string& selection) c return parser.parse(selection); } +void +DistributorStripeComponent::update_bucket_database(const document::Bucket& bucket, const BucketCopy& changed_node, uint32_t update_flags) { + update_bucket_database(bucket, toVector<BucketCopy>(changed_node),update_flags); +} + +void +DistributorStripeComponent::remove_node_from_bucket_database(const document::Bucket& bucket, uint16_t node_index) { + remove_nodes_from_bucket_database(bucket, toVector<uint16_t>(node_index)); +} + } diff --git a/storage/src/vespa/storage/distributor/distributor_stripe_component.h b/storage/src/vespa/storage/distributor/distributor_stripe_component.h index 8bf507f3fac..8fd439992f7 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe_component.h +++ b/storage/src/vespa/storage/distributor/distributor_stripe_component.h @@ -8,7 +8,6 @@ #include "operationowner.h" #include "statechecker.h" #include <vespa/storage/common/distributorcomponent.h> -#include <vespa/storage/storageutil/utils.h> #include <vespa/storageapi/messageapi/storagecommand.h> #include <vespa/storageapi/buckets/bucketinfo.h> @@ -68,10 +67,7 @@ public: /** * Simple API for the common case of modifying a single node. */ - void update_bucket_database(const document::Bucket& bucket, const BucketCopy& changed_node, uint32_t update_flags) override { - update_bucket_database(bucket, toVector<BucketCopy>(changed_node),update_flags); - } - + void update_bucket_database(const document::Bucket& bucket, const BucketCopy& changed_node, uint32_t update_flags) override; /** * Adds the given copies to the bucket database. */ @@ -82,9 +78,7 @@ public: * If the resulting bucket is empty afterwards, removes the entire * bucket entry from the bucket database. */ - void remove_node_from_bucket_database(const document::Bucket& bucket, uint16_t node_index) override { - remove_nodes_from_bucket_database(bucket, toVector<uint16_t>(node_index)); - } + void remove_node_from_bucket_database(const document::Bucket& bucket, uint16_t node_index) override; /** * Removes the given bucket copies from the bucket database. diff --git a/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.cpp b/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.cpp index cc4eedd2a35..1ce5e5c589f 100644 --- a/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.cpp +++ b/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.cpp @@ -1,30 +1,60 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "ideal_service_layer_nodes_bundle.h" -#include <vespa/vdslib/distribution/idealnodecalculator.h> -#include <vespa/vespalib/stllike/hash_set_insert.hpp> - +#include <vespa/vespalib/stllike/hash_map.hpp> namespace storage::distributor { -IdealServiceLayerNodesBundle::IdealServiceLayerNodesBundle() noexcept - : _available_nodes(), - _available_nonretired_nodes(), - _available_nonretired_or_maintenance_nodes(), - _unordered_nonretired_or_maintenance_nodes() -{ +namespace { +constexpr size_t BUILD_HASH_LIMIT = 32; } +struct IdealServiceLayerNodesBundle::LookupMap : public vespalib::hash_map<uint16_t, Index> { + using Parent = vespalib::hash_map<uint16_t, Index>; + using Parent::Parent; +}; + +IdealServiceLayerNodesBundle::IdealServiceLayerNodesBundle() noexcept = default; +IdealServiceLayerNodesBundle::IdealServiceLayerNodesBundle(IdealServiceLayerNodesBundle &&) noexcept = default; +IdealServiceLayerNodesBundle::~IdealServiceLayerNodesBundle() = default; + void -IdealServiceLayerNodesBundle::set_available_nonretired_or_maintenance_nodes(std::vector<uint16_t> available_nonretired_or_maintenance_nodes) { - _available_nonretired_or_maintenance_nodes = std::move(available_nonretired_or_maintenance_nodes); - _unordered_nonretired_or_maintenance_nodes.clear(); - _unordered_nonretired_or_maintenance_nodes.insert(_available_nonretired_or_maintenance_nodes.begin(), - _available_nonretired_or_maintenance_nodes.end()); +IdealServiceLayerNodesBundle::set_nodes(ConstNodesRef nodes, + ConstNodesRef nonretired_nodes, + ConstNodesRef nonretired_or_maintenance_nodes) +{ + _nodes.clear(); + _nodes.reserve(nodes.size() + nonretired_nodes.size() + nonretired_or_maintenance_nodes.size()); + std::for_each(nodes.cbegin(), nodes.cend(), [this](uint16_t n) { _nodes.emplace_back(n); }); + _available_sz = nodes.size(); + std::for_each(nonretired_nodes.cbegin(), nonretired_nodes.cend(), [this](uint16_t n) { _nodes.emplace_back(n); }); + _nonretired_sz = nonretired_nodes.size(); + std::for_each(nonretired_or_maintenance_nodes.cbegin(), nonretired_or_maintenance_nodes.cend(), [this](uint16_t n) { _nodes.emplace_back(n); }); + + if (nonretired_or_maintenance_nodes.size() > BUILD_HASH_LIMIT) { + _nonretired_or_maintenance_node_2_index = std::make_unique<LookupMap>(nonretired_or_maintenance_nodes.size()); + for (uint16_t i(0); i < nonretired_or_maintenance_nodes.size(); i++) { + _nonretired_or_maintenance_node_2_index->insert(std::make_pair(nonretired_or_maintenance_nodes[i], Index(i))); + } + } } -IdealServiceLayerNodesBundle::IdealServiceLayerNodesBundle(IdealServiceLayerNodesBundle &&) noexcept = default; +IdealServiceLayerNodesBundle::Index +IdealServiceLayerNodesBundle::ConstNodesRef2Index::lookup(uint16_t node) const noexcept { + for (uint16_t i(0); i < _idealState.size(); i++) { + if (node == _idealState[i]) return Index(i); + } + return Index::invalid(); +} -IdealServiceLayerNodesBundle::~IdealServiceLayerNodesBundle() = default; +IdealServiceLayerNodesBundle::Index +IdealServiceLayerNodesBundle::nonretired_or_maintenance_index(uint16_t node) const noexcept { + if (_nonretired_or_maintenance_node_2_index) { + const auto found = _nonretired_or_maintenance_node_2_index->find(node); + return (found != _nonretired_or_maintenance_node_2_index->end()) ? found->second : Index::invalid(); + } else { + return ConstNodesRef2Index(available_nonretired_or_maintenance_nodes()).lookup(node); + } +} } diff --git a/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.h b/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.h index 9577ec09208..1fce5bf0813 100644 --- a/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.h +++ b/storage/src/vespa/storage/distributor/ideal_service_layer_nodes_bundle.h @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/vespalib/util/small_vector.h> namespace storage::distributor { @@ -9,30 +9,63 @@ namespace storage::distributor { * Bundle of ideal service layer nodes for a bucket. */ class IdealServiceLayerNodesBundle { - std::vector<uint16_t> _available_nodes; - std::vector<uint16_t> _available_nonretired_nodes; - std::vector<uint16_t> _available_nonretired_or_maintenance_nodes; - vespalib::hash_set<uint16_t> _unordered_nonretired_or_maintenance_nodes; public: + using ConstNodesRef = vespalib::ConstArrayRef<uint16_t>; + class Index { + public: + constexpr explicit Index(uint16_t index) noexcept : _index(index) {} + constexpr bool valid() const noexcept { + return _index < MAX_INDEX; + } + constexpr operator uint16_t () const noexcept { return _index; } + static constexpr Index invalid() noexcept { return Index(MAX_INDEX); } + private: + static constexpr uint16_t MAX_INDEX = 0xffff; + uint16_t _index; + }; + struct Node2Index { + virtual ~Node2Index() = default; + virtual Index lookup(uint16_t node) const noexcept = 0; + }; + class NonRetiredOrMaintenance2Index final : public Node2Index { + public: + NonRetiredOrMaintenance2Index(const IdealServiceLayerNodesBundle & idealState) noexcept : _idealState(idealState) {} + Index lookup(uint16_t node) const noexcept override { + return _idealState.nonretired_or_maintenance_index(node); + } + private: + const IdealServiceLayerNodesBundle & _idealState; + }; + class ConstNodesRef2Index final : public Node2Index { + public: + ConstNodesRef2Index(ConstNodesRef idealState) noexcept : _idealState(idealState) {} + Index lookup(uint16_t node) const noexcept override; + private: + ConstNodesRef _idealState; + }; IdealServiceLayerNodesBundle() noexcept; IdealServiceLayerNodesBundle(IdealServiceLayerNodesBundle &&) noexcept; ~IdealServiceLayerNodesBundle(); - void set_available_nodes(std::vector<uint16_t> available_nodes) { - _available_nodes = std::move(available_nodes); - } - void set_available_nonretired_nodes(std::vector<uint16_t> available_nonretired_nodes) { - _available_nonretired_nodes = std::move(available_nonretired_nodes); - } - void set_available_nonretired_or_maintenance_nodes(std::vector<uint16_t> available_nonretired_or_maintenance_nodes); - const std::vector<uint16_t> & available_nodes() const noexcept { return _available_nodes; } - const std::vector<uint16_t> & available_nonretired_nodes() const noexcept { return _available_nonretired_nodes; } - const std::vector<uint16_t> & available_nonretired_or_maintenance_nodes() const noexcept { - return _available_nonretired_or_maintenance_nodes; + void set_nodes(ConstNodesRef nodes, ConstNodesRef nonretired_nodes, ConstNodesRef nonretired_or_maintenance_nodes); + ConstNodesRef available_nodes() const noexcept { return {_nodes.data(), _available_sz}; } + ConstNodesRef available_nonretired_nodes() const noexcept { return {_nodes.data() + _available_sz, _nonretired_sz}; } + ConstNodesRef available_nonretired_or_maintenance_nodes() const noexcept { + uint16_t offset = _available_sz + _nonretired_sz; + return {_nodes.data() + offset, _nodes.size() - offset}; } bool is_nonretired_or_maintenance(uint16_t node) const noexcept { - return _unordered_nonretired_or_maintenance_nodes.contains(node); + return nonretired_or_maintenance_index(node) != Index::invalid(); } + NonRetiredOrMaintenance2Index nonretired_or_maintenance_to_index() const noexcept { return {*this}; } + ConstNodesRef2Index available_to_index() const noexcept { return {available_nodes()}; } +private: + struct LookupMap; + Index nonretired_or_maintenance_index(uint16_t node) const noexcept; + vespalib::SmallVector<uint16_t,16> _nodes; + std::unique_ptr<LookupMap> _nonretired_or_maintenance_node_2_index; + uint16_t _available_sz; + uint16_t _nonretired_sz; }; } diff --git a/storage/src/vespa/storage/distributor/idealstatemetricsset.cpp b/storage/src/vespa/storage/distributor/idealstatemetricsset.cpp index d50b2004bf2..ea345176dd0 100644 --- a/storage/src/vespa/storage/distributor/idealstatemetricsset.cpp +++ b/storage/src/vespa/storage/distributor/idealstatemetricsset.cpp @@ -134,7 +134,7 @@ IdealStateMetricSet::IdealStateMetricSet() IdealStateMetricSet::~IdealStateMetricSet() = default; -void IdealStateMetricSet::setPendingOperations(vespalib::ConstArrayRef<uint64_t> newMetrics) { +void IdealStateMetricSet::setPendingOperations(std::span<uint64_t, IdealStateOperation::OPERATION_COUNT> newMetrics) { for (uint32_t i = 0; i < IdealStateOperation::OPERATION_COUNT; i++) { operations[i]->pending.set(newMetrics[i]); } diff --git a/storage/src/vespa/storage/distributor/idealstatemetricsset.h b/storage/src/vespa/storage/distributor/idealstatemetricsset.h index 0bbc13d061a..e51e58ba3a4 100644 --- a/storage/src/vespa/storage/distributor/idealstatemetricsset.h +++ b/storage/src/vespa/storage/distributor/idealstatemetricsset.h @@ -5,7 +5,7 @@ #include <vespa/metrics/valuemetric.h> #include <vespa/metrics/countmetric.h> #include <vespa/storage/distributor/operations/idealstate/idealstateoperation.h> -#include <vespa/vespalib/util/arrayref.h> +#include <span> namespace storage::distributor { @@ -62,7 +62,7 @@ public: IdealStateMetricSet(); ~IdealStateMetricSet() override; - void setPendingOperations(vespalib::ConstArrayRef<uint64_t> newMetrics); + void setPendingOperations(std::span<uint64_t, IdealStateOperation::OPERATION_COUNT> newMetrics); }; } // storage::distributor diff --git a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp index e0c1abaaffa..86399c1b620 100644 --- a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp +++ b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp @@ -49,7 +49,7 @@ SimpleMaintenanceScanner::PendingMaintenanceStats & SimpleMaintenanceScanner::PendingMaintenanceStats::operator = (PendingMaintenanceStats &&) noexcept = default; SimpleMaintenanceScanner::PendingMaintenanceStats -SimpleMaintenanceScanner::PendingMaintenanceStats::reset() { +SimpleMaintenanceScanner::PendingMaintenanceStats::fetch_and_reset() { PendingMaintenanceStats prev = std::move(*this); global = GlobalMaintenanceStats(); perNodeStats.reset(prev.perNodeStats.numNodes()); @@ -78,11 +78,11 @@ SimpleMaintenanceScanner::scanNext() } SimpleMaintenanceScanner::PendingMaintenanceStats -SimpleMaintenanceScanner::reset() +SimpleMaintenanceScanner::fetch_and_reset() { _bucketCursor = document::BucketId(); _bucketSpaceItr = _bucketSpaceRepo.begin(); - return _pendingMaintenance.reset(); + return _pendingMaintenance.fetch_and_reset(); } void diff --git a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h index 35b022c7af7..3d1a57a6422 100644 --- a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h +++ b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h @@ -29,7 +29,7 @@ public: PendingMaintenanceStats(PendingMaintenanceStats &&) noexcept; PendingMaintenanceStats &operator = (PendingMaintenanceStats &&) noexcept; ~PendingMaintenanceStats(); - PendingMaintenanceStats reset(); + [[nodiscard]] PendingMaintenanceStats fetch_and_reset(); GlobalMaintenanceStats global; NodeMaintenanceStatsTracker perNodeStats; @@ -53,7 +53,7 @@ public: ~SimpleMaintenanceScanner() override; ScanResult scanNext() override; - PendingMaintenanceStats reset(); + [[nodiscard]] PendingMaintenanceStats fetch_and_reset(); // TODO: move out into own interface! void prioritizeBucket(const document::Bucket &id); diff --git a/storage/src/vespa/storage/distributor/messagetracker.cpp b/storage/src/vespa/storage/distributor/messagetracker.cpp index 28fbaad4619..842238aa24c 100644 --- a/storage/src/vespa/storage/distributor/messagetracker.cpp +++ b/storage/src/vespa/storage/distributor/messagetracker.cpp @@ -20,7 +20,7 @@ MessageTracker::~MessageTracker() = default; void MessageTracker::flushQueue(MessageSender& sender) { - _sentMessages.resize(_commandQueue.size()); + _sentMessages.resize(_sentMessages.size() + _commandQueue.size()); for (const auto & toSend : _commandQueue) { toSend._msg->setAddress(api::StorageMessageAddress::create(_cluster_ctx.cluster_name_ptr(), lib::NodeType::STORAGE, toSend._target)); _sentMessages[toSend._msg->getMsgId()] = toSend._target; diff --git a/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp index 86ea9a559f5..854e7d15f82 100644 --- a/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp @@ -10,7 +10,6 @@ #include <vespa/storage/distributor/storage_node_up_states.h> #include <vespa/storageapi/message/persistence.h> #include <vespa/vdslib/distribution/distribution.h> -#include <vespa/vdslib/distribution/idealnodecalculatorimpl.h> #include <vespa/vdslib/state/clusterstate.h> #include <algorithm> @@ -67,13 +66,11 @@ PutOperation::insertDatabaseEntryAndScheduleCreateBucket(const OperationTargetLi assert(!multipleBuckets); (void) multipleBuckets; BucketDatabase::Entry entry(_bucket_space.getBucketDatabase().get(lastBucket)); - const std::vector<uint16_t> & idealState = _bucket_space.get_ideal_service_layer_nodes_bundle( - lastBucket).available_nodes(); - active = ActiveCopy::calculate(idealState, _bucket_space.getDistribution(), entry, + active = ActiveCopy::calculate(_bucket_space.get_ideal_service_layer_nodes_bundle(lastBucket).available_to_index(), _bucket_space.getDistribution(), entry, _op_ctx.distributor_config().max_activation_inhibited_out_of_sync_groups()); LOG(debug, "Active copies for bucket %s: %s", entry.getBucketId().toString().c_str(), active.toString().c_str()); for (uint32_t i=0; i<active.size(); ++i) { - BucketCopy copy(*entry->getNode(active[i]._nodeIndex)); + BucketCopy copy(*entry->getNode(active[i].nodeIndex())); copy.setActive(true); entry->updateNode(copy); } diff --git a/storage/src/vespa/storage/distributor/operationtargetresolver.h b/storage/src/vespa/storage/distributor/operationtargetresolver.h index 5e3c4a73f66..2de477d03e5 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolver.h +++ b/storage/src/vespa/storage/distributor/operationtargetresolver.h @@ -15,23 +15,23 @@ namespace storage::distributor { class OperationTarget : public vespalib::AsciiPrintable { document::Bucket _bucket; - lib::Node _node; - bool _newCopy; + lib::Node _node; + bool _newCopy; public: - OperationTarget() : _newCopy(true) {} - OperationTarget(const document::Bucket& bucket, const lib::Node& node, bool newCopy) + OperationTarget() noexcept : _newCopy(true) {} + OperationTarget(const document::Bucket& bucket, const lib::Node& node, bool newCopy) noexcept : _bucket(bucket), _node(node), _newCopy(newCopy) {} - document::BucketId getBucketId() const { return _bucket.getBucketId(); } - document::Bucket getBucket() const { return _bucket; } - const lib::Node& getNode() const { return _node; } - bool isNewCopy() const { return _newCopy; } + document::BucketId getBucketId() const noexcept { return _bucket.getBucketId(); } + document::Bucket getBucket() const noexcept { return _bucket; } + const lib::Node& getNode() const noexcept { return _node; } + bool isNewCopy() const noexcept { return _newCopy; } - bool operator==(const OperationTarget& o) const { + bool operator==(const OperationTarget& o) const noexcept { return (_bucket == o._bucket && _node == o._node && _newCopy == o._newCopy); } - bool operator!=(const OperationTarget& o) const { + bool operator!=(const OperationTarget& o) const noexcept { return !(operator==(o)); } @@ -40,13 +40,13 @@ public: class OperationTargetList : public std::vector<OperationTarget> { public: - bool hasAnyNewCopies() const { + bool hasAnyNewCopies() const noexcept { for (size_t i=0; i<size(); ++i) { if (operator[](i).isNewCopy()) return true; } return false; } - bool hasAnyExistingCopies() const { + bool hasAnyExistingCopies() const noexcept { for (size_t i=0; i<size(); ++i) { if (!operator[](i).isNewCopy()) return true; } @@ -63,8 +63,7 @@ public: PUT }; - virtual OperationTargetList getTargets(OperationType type, - const document::BucketId& id) = 0; + virtual OperationTargetList getTargets(OperationType type, const document::BucketId& id) = 0; }; } diff --git a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp index 736d8c692e3..eb08cf51f43 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp +++ b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp @@ -9,22 +9,8 @@ namespace storage::distributor { -namespace { - -lib::IdealNodeList -make_node_list(const std::vector<uint16_t>& nodes) -{ - lib::IdealNodeList list; - for (auto node : nodes) { - list.push_back(lib::Node(lib::NodeType::STORAGE, node)); - } - return list; -} - -} - BucketInstance::BucketInstance(const document::BucketId& id, const api::BucketInfo& info, lib::Node node, - uint16_t idealLocationPriority, bool trusted, bool exist) + uint16_t idealLocationPriority, bool trusted, bool exist) noexcept : _bucket(id), _info(info), _node(node), _idealLocationPriority(idealLocationPriority), _trusted(trusted), _exist(exist) { @@ -44,19 +30,19 @@ BucketInstance::print(vespalib::asciistream& out, const PrintProperties&) const bool BucketInstanceList::contains(lib::Node node) const { - for (uint32_t i=0; i<_instances.size(); ++i) { - if (_instances[i]._node == node) return true; + for (const auto & instance : _instances) { + if (instance._node == node) return true; } return false; } void -BucketInstanceList::add(const BucketDatabase::Entry& e, const lib::IdealNodeList& idealState) +BucketInstanceList::add(const BucketDatabase::Entry& e, const IdealServiceLayerNodesBundle::Node2Index & idealState) { for (uint32_t i = 0; i < e.getBucketInfo().getNodeCount(); ++i) { const BucketCopy& copy(e.getBucketInfo().getNodeRef(i)); lib::Node node(lib::NodeType::STORAGE, copy.getNode()); - _instances.emplace_back(e.getBucketId(), copy.getBucketInfo(), node, idealState.indexOf(node), copy.trusted()); + _instances.emplace_back(e.getBucketId(), copy.getBucketInfo(), node, idealState.lookup(copy.getNode()), copy.trusted(), true); } } @@ -66,8 +52,8 @@ BucketInstanceList::populate(const document::BucketId& specificId, const Distrib std::vector<BucketDatabase::Entry> entries; db.getParents(specificId, entries); for (const auto & entry : entries) { - lib::IdealNodeList idealNodes(make_node_list(distributor_bucket_space.get_ideal_service_layer_nodes_bundle(entry.getBucketId()).available_nonretired_or_maintenance_nodes())); - add(entry, idealNodes); + auto node2Index = distributor_bucket_space.get_ideal_service_layer_nodes_bundle(entry.getBucketId()).nonretired_or_maintenance_to_index(); + add(entry, node2Index); } } @@ -96,7 +82,7 @@ BucketInstanceList::limitToRedundancyCopies(uint16_t redundancy) document::BucketId BucketInstanceList::leastSpecificLeafBucketInSubtree(const document::BucketId& candidateId, const document::BucketId& mostSpecificId, - const BucketDatabase& db) const + const BucketDatabase& db) { assert(candidateId.contains(mostSpecificId)); document::BucketId treeNode = candidateId; @@ -110,18 +96,17 @@ BucketInstanceList::leastSpecificLeafBucketInSubtree(const document::BucketId& c } void -BucketInstanceList::extendToEnoughCopies(const DistributorBucketSpace& distributor_bucket_space, - const BucketDatabase& db, - const document::BucketId& targetIfNonPreExisting, - const document::BucketId& mostSpecificId) +BucketInstanceList::extendToEnoughCopies(const DistributorBucketSpace& distributor_bucket_space, const BucketDatabase& db, + const document::BucketId& targetIfNonPreExisting, const document::BucketId& mostSpecificId) { document::BucketId newTarget(_instances.empty() ? targetIfNonPreExisting : _instances[0]._bucket); newTarget = leastSpecificLeafBucketInSubtree(newTarget, mostSpecificId, db); - lib::IdealNodeList idealNodes(make_node_list(distributor_bucket_space.get_ideal_service_layer_nodes_bundle(newTarget).available_nonretired_nodes())); + const auto & idealNodes = distributor_bucket_space.get_ideal_service_layer_nodes_bundle(newTarget).available_nonretired_nodes(); for (uint32_t i=0; i<idealNodes.size(); ++i) { - if (!contains(idealNodes[i])) { - _instances.emplace_back(newTarget, api::BucketInfo(), idealNodes[i], i, false, false); + lib::Node node(lib::NodeType::STORAGE, idealNodes[i]); + if (!contains(node)) { + _instances.emplace_back(newTarget, api::BucketInfo(), node, i, false, false); } } } @@ -131,7 +116,7 @@ BucketInstanceList::createTargets(document::BucketSpace bucketSpace) { OperationTargetList result; for (const auto& bi : _instances) { - result.push_back(OperationTarget(document::Bucket(bucketSpace, bi._bucket), bi._node, !bi._exist)); + result.emplace_back(document::Bucket(bucketSpace, bi._bucket), bi._node, !bi._exist); } return result; } diff --git a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h index 0caeee466e0..b76388da9bc 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h +++ b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h @@ -3,8 +3,8 @@ #pragma once #include "operationtargetresolver.h" +#include "ideal_service_layer_nodes_bundle.h" #include <vespa/storage/bucketdb/bucketdatabase.h> -#include <vespa/vdslib/distribution/idealnodecalculator.h> #include <algorithm> namespace storage::distributor { @@ -19,11 +19,11 @@ struct BucketInstance : public vespalib::AsciiPrintable { bool _trusted; bool _exist; - BucketInstance() : _idealLocationPriority(0xffff), - _trusted(false), _exist(false) {} + BucketInstance() noexcept + : _idealLocationPriority(0xffff), _trusted(false), _exist(false) {} BucketInstance(const document::BucketId& id, const api::BucketInfo& info, lib::Node node, uint16_t idealLocationPriority, bool trusted, - bool exist = true); + bool exist) noexcept; void print(vespalib::asciistream& out, const PrintProperties&) const override; }; @@ -42,10 +42,10 @@ class BucketInstanceList : public vespalib::AsciiPrintable { * Postconditions: * <return value>.contains(mostSpecificId) */ - document::BucketId leastSpecificLeafBucketInSubtree( - const document::BucketId& candidateId, - const document::BucketId& mostSpecificId, - const BucketDatabase& db) const; + static document::BucketId + leastSpecificLeafBucketInSubtree(const document::BucketId& candidateId, + const document::BucketId& mostSpecificId, + const BucketDatabase& db); public: void add(const BucketInstance& instance) { _instances.push_back(instance); } @@ -65,7 +65,7 @@ public: const document::BucketId& mostSpecificId); void populate(const document::BucketId&, const DistributorBucketSpace&, BucketDatabase&); - void add(const BucketDatabase::Entry& e, const lib::IdealNodeList& idealState); + void add(const BucketDatabase::Entry& e, const IdealServiceLayerNodesBundle::Node2Index & idealState); template <typename Order> void sort(const Order& order) { @@ -79,9 +79,9 @@ public: class OperationTargetResolverImpl : public OperationTargetResolver { const DistributorBucketSpace& _distributor_bucket_space; - BucketDatabase& _bucketDatabase; - uint32_t _minUsedBucketBits; - uint16_t _redundancy; + BucketDatabase& _bucketDatabase; + uint32_t _minUsedBucketBits; + uint16_t _redundancy; document::BucketSpace _bucketSpace; public: @@ -97,8 +97,7 @@ public: _bucketSpace(bucketSpace) {} - BucketInstanceList getAllInstances(OperationType type, - const document::BucketId& id); + BucketInstanceList getAllInstances(OperationType type, const document::BucketId& id); BucketInstanceList getInstances(OperationType type, const document::BucketId& id) { BucketInstanceList result(getAllInstances(type, id)); result.limitToRedundancyCopies(_redundancy); diff --git a/storage/src/vespa/storage/distributor/statechecker.h b/storage/src/vespa/storage/distributor/statechecker.h index 25918e7a047..d120b5e62d7 100644 --- a/storage/src/vespa/storage/distributor/statechecker.h +++ b/storage/src/vespa/storage/distributor/statechecker.h @@ -77,7 +77,9 @@ public: const bool merges_inhibited_in_bucket_space; const BucketDatabase::Entry& getSiblingEntry() const noexcept { return siblingEntry; } - const std::vector<uint16_t> & idealState() const noexcept { return idealStateBundle.available_nonretired_or_maintenance_nodes(); } + IdealServiceLayerNodesBundle::ConstNodesRef idealState() const noexcept { + return idealStateBundle.available_nonretired_or_maintenance_nodes(); + } document::Bucket getBucket() const noexcept { return bucket; } document::BucketId getBucketId() const noexcept { return bucket.getBucketId(); } diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 43766225155..2aef2d17f54 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -145,8 +145,10 @@ JoinBucketsStateChecker::isFirstSibling(const document::BucketId& bucketId) namespace { +using ConstNodesRef = IdealServiceLayerNodesBundle::ConstNodesRef; + bool -equalNodeSet(const std::vector<uint16_t>& idealState, const BucketDatabase::Entry& dbEntry) +equalNodeSet(ConstNodesRef idealState, const BucketDatabase::Entry& dbEntry) { if (idealState.size() != dbEntry->getNodeCount()) { return false; @@ -187,6 +189,42 @@ inconsistentJoinIsAllowed(const StateChecker::Context& context) && bucketAndSiblingReplicaLocationsEqualIdealState(context)); } +bool +isInconsistentlySplit(const StateChecker::Context& c) +{ + return (c.entries.size() > 1); +} + +// We don't want to invoke joins on buckets that have more replicas than +// required. This is in particular because joins cause ideal states to change +// for the target buckets and trigger merges. Since the removal of the non- +// ideal replicas is done by the DeleteBuckets state-checker, it will become +// preempted by potential follow-up joins unless we explicitly avoid these. +bool +contextBucketHasTooManyReplicas(const StateChecker::Context& c) +{ + return (c.entry->getNodeCount() > c.distribution.getRedundancy()); +} + +bool +bucketAtDistributionBitLimit(const document::BucketId& bucket, const StateChecker::Context& c) +{ + return (bucket.getUsedBits() <= std::max(uint32_t(c.systemState.getDistributionBitCount()), + c.distributorConfig.getMinimalBucketSplit())); +} + +bool +legalBucketSplitLevel(const document::BucketId& bucket, const StateChecker::Context& c) +{ + return bucket.getUsedBits() >= c.distributorConfig.getMinimalBucketSplit(); +} + +bool +bucketHasMultipleChildren(const document::BucketId& bucket, const StateChecker::Context& c) +{ + return c.db.childCount(bucket) > 1; +} + } // anon ns bool @@ -246,28 +284,6 @@ JoinBucketsStateChecker::singleBucketJoinIsEnabled(const Context& c) return c.distributorConfig.getEnableJoinForSiblingLessBuckets(); } -namespace { - -// We don't want to invoke joins on buckets that have more replicas than -// required. This is in particular because joins cause ideal states to change -// for the target buckets and trigger merges. Since the removal of the non- -// ideal replicas is done by the DeleteBuckets state-checker, it will become -// preempted by potential follow-up joins unless we explicitly avoid these. -bool -contextBucketHasTooManyReplicas(const StateChecker::Context& c) -{ - return (c.entry->getNodeCount() > c.distribution.getRedundancy()); -} - -bool -bucketAtDistributionBitLimit(const document::BucketId& bucket, const StateChecker::Context& c) -{ - return (bucket.getUsedBits() <= std::max(uint32_t(c.systemState.getDistributionBitCount()), - c.distributorConfig.getMinimalBucketSplit())); -} - -} - bool JoinBucketsStateChecker::shouldJoin(const Context& c) { @@ -361,22 +377,6 @@ JoinBucketsStateChecker::smallEnoughToJoin(const Context& c) return true; } -namespace { - -bool -legalBucketSplitLevel(const document::BucketId& bucket, const StateChecker::Context& c) -{ - return bucket.getUsedBits() >= c.distributorConfig.getMinimalBucketSplit(); -} - -bool -bucketHasMultipleChildren(const document::BucketId& bucket, const StateChecker::Context& c) -{ - return c.db.childCount(bucket) > 1; -} - -} - document::Bucket JoinBucketsStateChecker::computeJoinBucket(const Context& c) { @@ -482,16 +482,6 @@ SplitInconsistentStateChecker::getReason(const document::BucketId& bucketId, con return reason.str(); } -namespace { - -bool -isInconsistentlySplit(const StateChecker::Context& c) -{ - return (c.entries.size() > 1); -} - -} - StateChecker::Result SplitInconsistentStateChecker::check(Context& c) const { @@ -513,7 +503,8 @@ SplitInconsistentStateChecker::check(Context& c) const namespace { -bool containsMaintenanceNode(const std::vector<uint16_t>& ideal, const StateChecker::Context& c) +bool +containsMaintenanceNode(ConstNodesRef ideal, const StateChecker::Context& c) { for (uint16_t n : ideal) { if (c.systemState.getNodeState(lib::Node(lib::NodeType::STORAGE, n)).getState() == lib::State::MAINTENANCE) { @@ -523,7 +514,8 @@ bool containsMaintenanceNode(const std::vector<uint16_t>& ideal, const StateChec return false; } -bool ideal_node_is_unavailable_in_pending_state(const StateChecker::Context& c) { +bool +ideal_node_is_unavailable_in_pending_state(const StateChecker::Context& c) { if (!c.pending_cluster_state) { return false; } @@ -536,7 +528,7 @@ bool ideal_node_is_unavailable_in_pending_state(const StateChecker::Context& c) } bool -consistentApartFromEmptyBucketsInNonIdealLocationAndInvalidEntries(const std::vector<uint16_t>& idealNodes, const BucketInfo& entry) +consistentApartFromEmptyBucketsInNonIdealLocationAndInvalidEntries(ConstNodesRef idealNodes, const BucketInfo& entry) { api::BucketInfo info; for (uint32_t i=0, n=entry.getNodeCount(); i<n; ++i) { @@ -820,7 +812,7 @@ DeleteExtraCopiesStateChecker::bucketHasNoData(const Context& c) bool DeleteExtraCopiesStateChecker::copyIsInIdealState(const BucketCopy& cp, const Context& c) { - return hasItem(c.idealState(), cp.getNode()); + return c.idealStateBundle.is_nonretired_or_maintenance(cp.getNode()); } bool @@ -940,7 +932,7 @@ bool BucketStateStateChecker::shouldSkipActivationDueToMaintenance(const ActiveList& activeNodes, const Context& c) { for (uint32_t i = 0; i < activeNodes.size(); ++i) { - const auto node_index = activeNodes[i]._nodeIndex; + const auto node_index = activeNodes[i].nodeIndex(); const BucketCopy* cp(c.entry->getNode(node_index)); if (!cp || cp->active()) { continue; @@ -978,7 +970,8 @@ BucketStateStateChecker::check(Context& c) const return Result::noMaintenanceNeeded(); } - ActiveList activeNodes = ActiveCopy::calculate(c.idealState(), c.distribution, c.entry, + ActiveList activeNodes = ActiveCopy::calculate(c.idealStateBundle.nonretired_or_maintenance_to_index(), + c.distribution, c.entry, c.distributorConfig.max_activation_inhibited_out_of_sync_groups()); if (activeNodes.empty()) { return Result::noMaintenanceNeeded(); @@ -990,12 +983,12 @@ BucketStateStateChecker::check(Context& c) const vespalib::asciistream reason; std::vector<uint16_t> operationNodes; for (uint32_t i=0; i<activeNodes.size(); ++i) { - const BucketCopy* cp = c.entry->getNode(activeNodes[i]._nodeIndex); + const BucketCopy* cp = c.entry->getNode(activeNodes[i].nodeIndex()); if (cp == nullptr || cp->active()) { continue; } - operationNodes.push_back(activeNodes[i]._nodeIndex); - reason << "[Setting node " << activeNodes[i]._nodeIndex << " as active: " << activeNodes[i].getReason() << "]"; + operationNodes.push_back(activeNodes[i].nodeIndex()); + reason << "[Setting node " << activeNodes[i].nodeIndex() << " as active: " << activeNodes[i].getReason() << "]"; } // Deactivate all copies that are currently marked as active. @@ -1006,7 +999,7 @@ BucketStateStateChecker::check(Context& c) const } bool shouldBeActive = false; for (uint32_t j=0; j<activeNodes.size(); ++j) { - if (activeNodes[j]._nodeIndex == cp.getNode()) { + if (activeNodes[j].nodeIndex() == cp.getNode()) { shouldBeActive = true; } } @@ -1022,7 +1015,7 @@ BucketStateStateChecker::check(Context& c) const std::vector<uint16_t> activeNodeIndexes; for (uint32_t i=0; i<activeNodes.size(); ++i) { - activeNodeIndexes.push_back(activeNodes[i]._nodeIndex); + activeNodeIndexes.push_back(activeNodes[i].nodeIndex()); } auto op = std::make_unique<SetBucketStateOperation>(c.node_ctx, BucketAndNodes(c.getBucket(), operationNodes), activeNodeIndexes); diff --git a/storage/src/vespa/storage/persistence/persistenceutil.h b/storage/src/vespa/storage/persistence/persistenceutil.h index c3fcb68ddc8..4bd0222bb9e 100644 --- a/storage/src/vespa/storage/persistence/persistenceutil.h +++ b/storage/src/vespa/storage/persistence/persistenceutil.h @@ -10,7 +10,6 @@ #include <vespa/persistence/spi/result.h> #include <vespa/persistence/spi/context.h> #include <vespa/vespalib/io/fileutil.h> -#include <vespa/storage/storageutil/utils.h> namespace storage::api { class StorageMessage; diff --git a/storage/src/vespa/storage/storageutil/utils.h b/storage/src/vespa/storage/storageutil/utils.h index debb7e71ace..3d3f5b85d71 100644 --- a/storage/src/vespa/storage/storageutil/utils.h +++ b/storage/src/vespa/storage/storageutil/utils.h @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vector> +#include <vespa/vespalib/util/arrayref.h> #include <sstream> namespace storage { @@ -10,50 +10,55 @@ namespace storage { * Creates a vector of the given type with one entry in it. */ template<class A> -std::vector<A> toVector(A entry) { +std::vector<A> +toVector(A entry) { std::vector<A> entries; entries.push_back(entry); return entries; -}; +} /** * Creates a vector of the given type with two entries in it. */ template<class A> -std::vector<A> toVector(A entry, A entry2) { +std::vector<A> +toVector(A entry, A entry2) { std::vector<A> entries; entries.push_back(entry); entries.push_back(entry2); return entries; -}; +} /** * Creates a vector of the given type with two entries in it. */ template<class A> -std::vector<A> toVector(A entry, A entry2, A entry3) { +std::vector<A> +toVector(A entry, A entry2, A entry3) { std::vector<A> entries; entries.push_back(entry); entries.push_back(entry2); entries.push_back(entry3); return entries; -}; +} /** * Creates a vector of the given type with two entries in it. */ template<class A> -std::vector<A> toVector(A entry, A entry2, A entry3, A entry4) { +std::vector<A> +toVector(A entry, A entry2, A entry3, A entry4) { std::vector<A> entries; entries.push_back(entry); entries.push_back(entry2); entries.push_back(entry3); entries.push_back(entry4); return entries; -}; +} template<class A> -std::string dumpVector(const std::vector<A>& vec) { +std::string +dumpVector(const std::vector<A>& vec) { std::ostringstream ost; for (uint32_t i = 0; i < vec.size(); ++i) { if (!ost.str().empty()) { @@ -65,27 +70,5 @@ std::string dumpVector(const std::vector<A>& vec) { return ost.str(); } -template<class A> -bool hasItem(const std::vector<A>& vec, A entry) { - for (uint32_t i = 0; i < vec.size(); ++i) { - if (vec[i] == entry) { - return true; - } - } - - return false; -} - -template<typename T> -struct ConfigReader : public T::Subscriber, public T -{ - T& config; // Alter to inherit T to simplify but kept this for compatability - - ConfigReader(const std::string& configId) : config(*this) { - T::subscribe(configId, *this); - } - void configure(const T& c) { config = c; } -}; - } diff --git a/vdslib/src/tests/distribution/CMakeLists.txt b/vdslib/src/tests/distribution/CMakeLists.txt index c4ae8b0291c..3f3be1e1cad 100644 --- a/vdslib/src/tests/distribution/CMakeLists.txt +++ b/vdslib/src/tests/distribution/CMakeLists.txt @@ -3,7 +3,6 @@ vespa_add_library(vdslib_testdistribution SOURCES distributiontest.cpp grouptest.cpp - idealnodecalculatorimpltest.cpp DEPENDS vdslib GTest::GTest diff --git a/vdslib/src/tests/distribution/distributiontest.cpp b/vdslib/src/tests/distribution/distributiontest.cpp index ec7c05fa7a2..ce07711a069 100644 --- a/vdslib/src/tests/distribution/distributiontest.cpp +++ b/vdslib/src/tests/distribution/distributiontest.cpp @@ -5,7 +5,6 @@ #include <vespa/config/subscription/configuri.h> #include <vespa/fastos/file.h> #include <vespa/vdslib/distribution/distribution.h> -#include <vespa/vdslib/distribution/idealnodecalculator.h> #include <vespa/vdslib/state/clusterstate.h> #include <vespa/vdslib/state/random.h> #include <vespa/vespalib/data/slime/slime.h> @@ -84,6 +83,51 @@ TEST(DistributionTest, test_verify_java_distributions) namespace { +/** +* A list of ideal nodes, sorted in preferred order. Wraps a vector to hide +* unneeded details, and make it easily printable. +*/ +class IdealNodeList : public document::Printable { +public: + IdealNodeList() noexcept; + ~IdealNodeList(); + + void push_back(const Node& node) { + _idealNodes.push_back(node); + } + + const Node& operator[](uint32_t i) const noexcept { return _idealNodes[i]; } + uint32_t size() const noexcept { return _idealNodes.size(); } + bool contains(const Node& n) const noexcept { + return indexOf(n) != 0xffff; + } + uint16_t indexOf(const Node& n) const noexcept { + for (uint16_t i=0; i<_idealNodes.size(); ++i) { + if (n == _idealNodes[i]) return i; + } + return 0xffff; + } + + void print(std::ostream& out, bool, const std::string &) const override; +private: + std::vector<Node> _idealNodes; +}; + +IdealNodeList::IdealNodeList() noexcept = default; +IdealNodeList::~IdealNodeList() = default; + +void +IdealNodeList::print(std::ostream& out, bool , const std::string &) const +{ + out << "["; + for (uint32_t i=0; i<_idealNodes.size(); ++i) { + if (i != 0) out << ", "; + out << _idealNodes[i]; + } + out << "]"; +} + + struct ExpectedResult { ExpectedResult() { } ExpectedResult(const ExpectedResult &) = default; diff --git a/vdslib/src/tests/distribution/idealnodecalculatorimpltest.cpp b/vdslib/src/tests/distribution/idealnodecalculatorimpltest.cpp deleted file mode 100644 index 4159491097c..00000000000 --- a/vdslib/src/tests/distribution/idealnodecalculatorimpltest.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/config-stor-distribution.h> -#include <vespa/vdslib/distribution/idealnodecalculatorimpl.h> -#include <vespa/vdslib/distribution/distribution.h> -#include <vespa/vdslib/state/clusterstate.h> -#include <vespa/vespalib/gtest/gtest.h> - -namespace storage::lib { - -/** - * Class is just a wrapper for distribution, so little needs to be tested. Just - * that: - * - * - get ideal nodes calls gets propagated correctly. - * - Changes in distribution/cluster state is picked up. - */ - -TEST(IdealNodeCalculatorImplTest, test_normal_usage) -{ - ClusterState state("storage:10"); - Distribution distr(Distribution::getDefaultDistributionConfig(3, 10)); - IdealNodeCalculatorImpl impl; - IdealNodeCalculatorConfigurable& configurable(impl); - IdealNodeCalculator& calc(impl); - configurable.setDistribution(distr); - configurable.setClusterState(state); - - std::string expected("[storage.8, storage.9, storage.6]"); - EXPECT_EQ( - expected, - calc.getIdealStorageNodes(document::BucketId(16, 5)).toString()); -} - -} diff --git a/vdslib/src/vespa/vdslib/distribution/CMakeLists.txt b/vdslib/src/vespa/vdslib/distribution/CMakeLists.txt index 0d9342291e8..58ec94eec9c 100644 --- a/vdslib/src/vespa/vdslib/distribution/CMakeLists.txt +++ b/vdslib/src/vespa/vdslib/distribution/CMakeLists.txt @@ -4,7 +4,6 @@ vespa_add_library(vdslib_distribution OBJECT distribution.cpp distribution_config_util.cpp group.cpp - idealnodecalculatorimpl.cpp redundancygroupdistribution.cpp DEPENDS ) diff --git a/vdslib/src/vespa/vdslib/distribution/idealnodecalculator.h b/vdslib/src/vespa/vdslib/distribution/idealnodecalculator.h deleted file mode 100644 index 4eb8f7e04ae..00000000000 --- a/vdslib/src/vespa/vdslib/distribution/idealnodecalculator.h +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * An interface to implement for a calculator calcuting ideal state. It should - * be easy to wrap this calculator in a cache. Thus options that seldom change, - * are taken in as set parameters, such that existing cache can be invalidated. - */ -#pragma once - -#include <vespa/document/bucket/bucketid.h> -#include <vespa/document/util/printable.h> -#include <vespa/vdslib/state/node.h> -#include <vector> -#include <memory> - -namespace storage::lib { - -class Distribution; -class ClusterState; - -/** - * A list of ideal nodes, sorted in preferred order. Wraps a vector to hide - * unneeded details, and make it easily printable. - */ -class IdealNodeList : public document::Printable { -public: - IdealNodeList() noexcept; - ~IdealNodeList(); - - void push_back(const Node& node) { - _idealNodes.push_back(node); - } - - const Node& operator[](uint32_t i) const noexcept { return _idealNodes[i]; } - uint32_t size() const noexcept { return _idealNodes.size(); } - bool contains(const Node& n) const noexcept { - return indexOf(n) != 0xffff; - } - uint16_t indexOf(const Node& n) const noexcept { - for (uint16_t i=0; i<_idealNodes.size(); ++i) { - if (n == _idealNodes[i]) return i; - } - return 0xffff; - } - - void print(std::ostream& out, bool, const std::string &) const override; -private: - std::vector<Node> _idealNodes; -}; - -/** - * Simple interface to use for those who needs to calculate ideal nodes. - */ -class IdealNodeCalculator { -public: - using SP = std::shared_ptr<IdealNodeCalculator>; - enum UpStates { - UpInit, - UpInitMaintenance, - UP_STATE_COUNT - }; - - virtual ~IdealNodeCalculator() = default; - - virtual IdealNodeList getIdealNodes(const NodeType&, const document::BucketId&, UpStates upStates = UpInit) const = 0; - - // Wrapper functions to make prettier call if nodetype is given. - IdealNodeList getIdealDistributorNodes(const document::BucketId& bucket, UpStates upStates = UpInit) const { - return getIdealNodes(NodeType::DISTRIBUTOR, bucket, upStates); - } - IdealNodeList getIdealStorageNodes(const document::BucketId& bucket, UpStates upStates = UpInit) const { - return getIdealNodes(NodeType::STORAGE, bucket, upStates); - } -}; - - -/** - * More complex interface that provides a way to alter needed settings not - * provided in the function call itself. - */ -class IdealNodeCalculatorConfigurable : public IdealNodeCalculator -{ -public: - using SP = std::shared_ptr<IdealNodeCalculatorConfigurable>; - - virtual void setDistribution(const Distribution&) = 0; - virtual void setClusterState(const ClusterState&) = 0; -}; - -} diff --git a/vdslib/src/vespa/vdslib/distribution/idealnodecalculatorimpl.cpp b/vdslib/src/vespa/vdslib/distribution/idealnodecalculatorimpl.cpp deleted file mode 100644 index 86123f47d6f..00000000000 --- a/vdslib/src/vespa/vdslib/distribution/idealnodecalculatorimpl.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "idealnodecalculatorimpl.h" -#include "distribution.h" -#include <vespa/vespalib/util/exceptions.h> -#include <ostream> -#include <cassert> - -namespace storage::lib { - -IdealNodeList::IdealNodeList() noexcept = default; -IdealNodeList::~IdealNodeList() = default; - -void -IdealNodeList::print(std::ostream& out, bool , const std::string &) const -{ - out << "["; - for (uint32_t i=0; i<_idealNodes.size(); ++i) { - if (i != 0) out << ", "; - out << _idealNodes[i]; - } - out << "]"; -} - -IdealNodeCalculatorImpl::IdealNodeCalculatorImpl() - : _distribution(0), - _clusterState(0) -{ - initUpStateMapping(); -} - -IdealNodeCalculatorImpl::~IdealNodeCalculatorImpl() = default; - -void -IdealNodeCalculatorImpl::setDistribution(const Distribution& d) { - _distribution = &d; -} -void -IdealNodeCalculatorImpl::setClusterState(const ClusterState& cs) { - _clusterState = &cs; -} - -IdealNodeList -IdealNodeCalculatorImpl::getIdealNodes(const NodeType& nodeType, - const document::BucketId& bucket, - UpStates upStates) const -{ - assert(_clusterState != 0); - assert(_distribution != 0); - std::vector<uint16_t> nodes; - _distribution->getIdealNodes(nodeType, *_clusterState, bucket, nodes, _upStates[upStates]); - IdealNodeList list; - for (uint32_t i=0; i<nodes.size(); ++i) { - list.push_back(Node(nodeType, nodes[i])); - } - return list; -} - -void -IdealNodeCalculatorImpl::initUpStateMapping() { - _upStates.clear(); - _upStates.resize(UP_STATE_COUNT); - _upStates[UpInit] = "ui"; - _upStates[UpInitMaintenance] = "uim"; - for (uint32_t i=0; i<_upStates.size(); ++i) { - if (_upStates[i] == 0) { - throw vespalib::IllegalStateException("Failed to initialize up state. Code likely not updated " - "after another upstate was added.", VESPA_STRLOC); - } - } -} - -} diff --git a/vdslib/src/vespa/vdslib/distribution/idealnodecalculatorimpl.h b/vdslib/src/vespa/vdslib/distribution/idealnodecalculatorimpl.h deleted file mode 100644 index 9b36f1094fd..00000000000 --- a/vdslib/src/vespa/vdslib/distribution/idealnodecalculatorimpl.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * A cache for an ideal nodes implementation. Making it cheap for localized - * access, regardless of real implementation. - */ -#pragma once - -#include "idealnodecalculator.h" - -namespace storage::lib { - -class IdealNodeCalculatorImpl : public IdealNodeCalculatorConfigurable { - std::vector<const char*> _upStates; - const Distribution* _distribution; - const ClusterState* _clusterState; - -public: - IdealNodeCalculatorImpl(); - ~IdealNodeCalculatorImpl(); - - void setDistribution(const Distribution& d) override; - void setClusterState(const ClusterState& cs) override; - - IdealNodeList getIdealNodes(const NodeType& nodeType, - const document::BucketId& bucket, - UpStates upStates) const override; -private: - void initUpStateMapping(); -}; - -} diff --git a/vespalib/src/vespa/vespalib/stllike/hash_map.cpp b/vespalib/src/vespa/vespalib/stllike/hash_map.cpp index abb88fe674f..50a3d73fe12 100644 --- a/vespalib/src/vespa/vespalib/stllike/hash_map.cpp +++ b/vespalib/src/vespa/vespalib/stllike/hash_map.cpp @@ -16,6 +16,7 @@ VESPALIB_HASH_MAP_INSTANTIATE(vespalib::string, double); VESPALIB_HASH_MAP_INSTANTIATE(int64_t, int32_t); VESPALIB_HASH_MAP_INSTANTIATE(int64_t, uint32_t); VESPALIB_HASH_MAP_INSTANTIATE(int32_t, uint32_t); +VESPALIB_HASH_MAP_INSTANTIATE(uint16_t, uint16_t); VESPALIB_HASH_MAP_INSTANTIATE(uint16_t, uint32_t); VESPALIB_HASH_MAP_INSTANTIATE(uint32_t, int32_t); VESPALIB_HASH_MAP_INSTANTIATE(uint32_t, uint32_t); diff --git a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp index 51a639a3c4e..f711d3d8685 100644 --- a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp +++ b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp @@ -11,6 +11,7 @@ #include <filesystem> using vespalib::make_string_short::fmt; +namespace fs = std::filesystem; namespace vespalib::alloc { @@ -21,7 +22,7 @@ MmapFileAllocator::MmapFileAllocator(const vespalib::string& dir_name) _allocations(), _freelist() { - std::filesystem::create_directories(std::filesystem::path(_dir_name)); + fs::create_directories(fs::path(_dir_name)); _file.open(O_RDWR | O_CREAT | O_TRUNC, false); } @@ -30,7 +31,7 @@ MmapFileAllocator::~MmapFileAllocator() assert(_allocations.empty()); _file.close(); _file.unlink(); - std::filesystem::remove_all(std::filesystem::path(_dir_name)); + fs::remove_all(fs::path(_dir_name)); } uint64_t |